In [1]:
# imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
# loading in dataframes from csv files, encoding changed from utf-8 (default) to latin-1 to read the csv files
ancestry_2016 = pd.read_csv('2016-FN-ancestry.CSV', encoding = 'latin-1')
education_2016 = pd.read_csv('2016-FN-education.CSV', encoding = 'latin-1')
home_2016 = pd.read_csv('2016-FN-home.CSV', encoding = 'latin-1')
income_2016 = pd.read_csv('2016-FN-income.CSV', encoding = 'latin-1')
ancestry_2021 = pd.read_csv('2021-FN-ancestry.CSV', encoding = 'latin-1')
education_2021 = pd.read_csv('2021-FN-education.CSV', encoding = 'latin-1')
home_2021 = pd.read_csv('2021-FN-home.CSV', encoding = 'latin-1')
income_2021 = pd.read_csv('2021-FN-income.CSV', encoding = 'latin-1')

In [3]:
# 2016 Ancestry Data

# dropping unnecessary columns and Null ones
ancestry_2016 = ancestry_2016.drop(columns = ['Total - Single and multiple Aboriginal ancestry responses [2]', '  Non-Aboriginal ancestry responses only [5]'])
ancestry_2016 = ancestry_2016.dropna(axis = 'columns')

# dropping rows i don't want
ancestry_2016 = ancestry_2016.drop([0,1,2,72])

# some row values were nested within another row, so i stored them into a variable to delete the nested ones to reappend them afterwards. 
# later found out i could have just removed leading whistespaces to get rid of the nests
metisanc = ancestry_2016.loc[70]
inuitanc = ancestry_2016.loc[71]
ancestry_2016 = ancestry_2016.drop([70,71])
ancestry_2016 = ancestry_2016.append(metisanc)
ancestry_2016 = ancestry_2016.append(inuitanc)

# resetting the index of my dataframe
ancestry_2016 = ancestry_2016.reset_index(drop=True)

# renaming my columns
ancestry_2016 = ancestry_2016.rename(columns = \
{'Aboriginal ancestry responses (73)': 'Indigenous Group', '  Single ancestry responses [3]':'Homogenous Ancestry', '  Multiple ancestry responses [4]':'Heterogenous Ancestry'})

# updating the column to display a comma to separate thousands
ancestry_2016['Homogenous Ancestry'] = ancestry_2016['Homogenous Ancestry'].map('{:,d}'.format)
ancestry_2016['Heterogenous Ancestry'] = ancestry_2016['Heterogenous Ancestry'].map('{:,d}'.format)

# inserting a new column in dataframe named 'Grouping' (Based on First Nation, Inuit, Métis groupings)
ancestry_2016.insert(1, "Grouping", True)

# assigning indigenous groups to their respective grouping
ancestry_2016 = ancestry_2016.assign(Grouping = 'First Nation')
ancestry_2016.at[68, 'Grouping'] = 'Inuit'
ancestry_2016.at[67, 'Grouping'] = 'Métis'

# removing trailing and leading white spaces, numbers and removing specific charaters
ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.strip()
ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.replace('\d+', '', regex=True)
ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.replace('[', '', regex=True)
ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.replace(']', '', regex=True)
ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.replace(', n.o.s.', '')

# removing parenthesis and everything inside
ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.replace(r"\(.*\)","")

# commands used to display dataset
ancestry_2016
print(ancestry_2016.to_markdown())

# converting dataset into a .csv file
ancestry_2016.to_csv('ancestry_2016.csv', encoding='utf-8', index=False)

|    | Indigenous Group               | Grouping     | Homogenous Ancestry   | Heterogenous Ancestry   |
|---:|:-------------------------------|:-------------|:----------------------|:------------------------|
|  0 | Abenaki                        | First Nation | 8,250                 | 1,520                   |
|  1 | Algonquin                      | First Nation | 31,150                | 9,735                   |
|  2 | Anishinaabe                    | First Nation | 2,940                 | 1,560                   |
|  3 | Apache                         | First Nation | 520                   | 300                     |
|  4 | Assiniboine                    | First Nation | 535                   | 1,280                   |
|  5 | Atikamekw                      | First Nation | 6,690                 | 1,310                   |
|  6 | Beaver                         | First Nation | 585                   | 1,115                   |
|  7 | Blackfoot                      | First Nation | 

  ancestry_2016 = ancestry_2016.append(metisanc)
  ancestry_2016 = ancestry_2016.append(inuitanc)
  ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.replace(', n.o.s.', '')
  ancestry_2016['Indigenous Group'] = ancestry_2016['Indigenous Group'].str.replace(r"\(.*\)","")


In [4]:
# 2016 Education Dataset

# removing trailing and leading white spaces
education_2016.columns = education_2016.columns.str.strip()

# dropping columns and null columns
education_2016 = education_2016.drop(columns = ['Total - Labour force status [3]', 'Participation rate', 'In the labour force'])
education_2016 = education_2016.dropna(axis = 'columns')

# renaming columns
education_2016 = education_2016.rename(columns = {'Highest certificate, diploma or degree (15)': 'Scolarity Level'})

# updating the column to display a comma to separate thousands
education_2016['Employed'] = education_2016['Employed'].map('{:,d}'.format)
education_2016['Unemployed'] = education_2016['Unemployed'].map('{:,d}'.format)
education_2016['Not in the labour force'] = education_2016['Not in the labour force'].map('{:,d}'.format)

# removing trailing and leading white spaces, numbers and removing specific charaters
education_2016['Scolarity Level'] = education_2016['Scolarity Level'].str.replace('\d+', '', regex=True)
education_2016['Scolarity Level'] = education_2016['Scolarity Level'].str.replace('[', '')
education_2016['Scolarity Level'] = education_2016['Scolarity Level'].str.replace(']', '')

# isolating string in specific cell to update a future dataset (for consistency purposes)
education_2016.at[1, 'Scolarity Level']

# commands used to display dataset
education_2016
print(education_2016.to_markdown())

# converting dataset into a .csv file
education_2016.to_csv('education_2016.csv', encoding='utf-8', index=False)

|    | Scolarity Level                                                 | Employed   | Unemployed   | Not in the labour force   |   Employment rate |   Unemployment rate |
|---:|:----------------------------------------------------------------|:-----------|:-------------|:--------------------------|------------------:|--------------------:|
|  0 | No certificate, diploma or degree                               | 119,535    | 41,310       | 251,330                   |              29   |                25.7 |
|  1 | Secondary (high) school diploma or equivalency certificate      | 183,675    | 33,640       | 105,445                   |              56.9 |                15.5 |
|  2 | Apprenticeship or trades certificate or diploma                 | 79,560     | 14,265       | 37,355                    |              60.6 |                15.2 |
|  3 | College, CEGEP or other non-university certificate or diploma   | 155,225    | 17,830       | 53,060                    |              68.

  education_2016['Scolarity Level'] = education_2016['Scolarity Level'].str.replace('[', '')
  education_2016['Scolarity Level'] = education_2016['Scolarity Level'].str.replace(']', '')


In [5]:
# 2016 Home dataset

# removing trailing and leading whitespaces
home_2016.columns = home_2016.columns.str.strip()

# dropping columns and null columns
home_2016 = home_2016.drop(columns = ['Total - Residence on or off reserve [2]'])
home_2016 = home_2016.dropna(axis = 'columns')

# renaming columns
home_2016 = home_2016.rename(columns = {'Membership in a First Nation or Indian Band (663)': 'Indigenous Group', 'On reserve [3]':'On reserve'})

# updating the column to display a comma to separate thousands
# home_2016['On reserve'] = home_2016['On reserve'].map('{:,d}'.format)
# home_2016['Off reserve'] = home_2016['Off reserve'].map('{:,d}'.format)

# removing trailing and leading white spaces, numbers and removing specific charaters
home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace('\d+', '', regex=True)
home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace('[', '')
home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace(']', '')
home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace(', n.o.s.', '')
home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.strip()

# isolating string in specific cell to update a future dataset (for consistency purposes)
home_2016.at[659, 'Indigenous Group'] = 'First Nations ancestry, n.i.e.'
home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace(r"\(.*\)","")

# commands used to display dataset
home_2016
print(home_2016.to_markdown())

# converting dataset into a .csv file
home_2016.to_csv('home_2016.csv', encoding='utf-8', index=False)

|     | Indigenous Group                                     |   On reserve |   Off reserve |
|----:|:-----------------------------------------------------|-------------:|--------------:|
|   0 | Première Nation des Abénakis de Wôlinak              |          105 |           185 |
|   1 | Odanak                                               |          310 |          1025 |
|   2 | Abenaki                                              |            0 |          1420 |
|   3 | Algonquins of Barriere Lake                          |           90 |           270 |
|   4 | Algonquins of Pikwàkanagàn First Nation              |          370 |          3175 |
|   5 | Communauté Anicinape de Kitcisakik                   |          295 |           145 |
|   6 | Conseil de la Première Nation Abitibiwinni           |          480 |           285 |
|   7 | Eagle Village First Nation - Kipawa                  |          240 |           630 |
|   8 | Kitigan Zibi Anishinabeg                            

  home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace('[', '')
  home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace(']', '')
  home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace(', n.o.s.', '')
  home_2016['Indigenous Group'] = home_2016['Indigenous Group'].str.replace(r"\(.*\)","")


In [6]:
# 2016 Income Dataset

# removing trailing and leading white space
income_2016.columns = income_2016.columns.str.strip()

# dropping columns and null columns
income_2016 = income_2016.drop(columns = ['Total - Aboriginal identity [4]', 'Non-Aboriginal identity', 'Aboriginal identity [5]', 'Single Aboriginal responses [6]'])
income_2016 = income_2016.dropna(axis = 'columns')

# dropping rows and resetting index
income_2016 = income_2016.loc[3:8]
income_2016 = income_2016.drop([5,6])
income_2016 = income_2016.reset_index(drop=True)

# renaming columns
income_2016 = income_2016.rename(columns = \
{'Income statistics (17)': 'Income statistics', 'First Nations (North American Indian) [7]':'First Nations', 'Inuk (Inuit)':'Inuit', \
'Multiple Aboriginal responses [8]':'Heterogenous Ancestry', 'Aboriginal responses not included elsewhere': 'First Nations ancestry, n.i.e.'})

# casting certain columns as floats
income_2016['First Nations'] = income_2016['First Nations'].astype(float)
income_2016['Métis'] = income_2016['Métis'].astype(float)
income_2016['Inuit'] = income_2016['Inuit'].astype(float)
income_2016['Heterogenous Ancestry'] = income_2016['Heterogenous Ancestry'].astype(float)
income_2016['First Nations ancestry, n.i.e.'] = income_2016['First Nations ancestry, n.i.e.'].astype(float)

# making floats display 2 decimal places
income_2016['First Nations'] = income_2016['First Nations'].map('{:,.2f}'.format)
income_2016['Métis'] = income_2016['Métis'].map('{:,.2f}'.format)
income_2016['Inuit'] = income_2016['Inuit'].map('{:,.2f}'.format)
income_2016['Heterogenous Ancestry'] = income_2016['Heterogenous Ancestry'].map('{:,.2f}'.format)
income_2016['First Nations ancestry, n.i.e.'] = income_2016['First Nations ancestry, n.i.e.'].map('{:,.2f}'.format)

# displaying datasets
income_2016
print(income_2016.to_markdown())

# converting dataset to .csv file
income_2016.to_csv('income_2016.csv', encoding='utf-8', index=False)

|    | Income statistics            | First Nations   | Métis     | Inuit     | Heterogenous Ancestry   | First Nations ancestry, n.i.e.   |
|---:|:-----------------------------|:----------------|:----------|:----------|:------------------------|:---------------------------------|
|  0 | Median total income ($)      | 21,875.00       | 31,916.00 | 24,502.00 | 24,800.00               | 28,813.00                        |
|  1 | Average total income ($)     | 31,519.00       | 42,187.00 | 37,871.00 | 41,057.00               | 39,795.00                        |
|  2 | Median after-tax income ($)  | 21,253.00       | 29,068.00 | 23,635.00 | 23,518.00               | 26,393.00                        |
|  3 | Average after-tax income ($) | 28,108.00       | 35,440.00 | 32,647.00 | 33,484.00               | 33,548.00                        |


In [7]:
# 2021 Ancestry Dataset

# dropping columns
ancestry_2021 = ancestry_2021.drop(columns = ['Total - Single or multiple Indigenous ancestry responses 8', 'Non-Indigenous ancestry responses only 11'])

# removing double quotes from column names just to make manipulating them easier
ancestry_2021.columns = ancestry_2021.columns.str.replace('"','')

# renaming columns
ancestry_2021 = ancestry_2021.rename(columns = {'ï»¿Single and multiple Indigenous ancestry responses (4A) 6':'Indigenous Group', \
'Single Indigenous ancestry responses 9':'Homogenous Ancestry', 'Multiple Indigenous ancestry responses 10':'Heterogenous Ancestry'})

# dropping rows
ancestry_2021 = ancestry_2021.drop([0,1,2,4,39])

# removing numbers, leading and trailing whitespaces and select characters
ancestry_2021['Indigenous Group'] = ancestry_2021['Indigenous Group'].str.replace('\d+', '', regex=True)
ancestry_2021['Indigenous Group'] = ancestry_2021['Indigenous Group'].str.strip()
ancestry_2021['Indigenous Group'] = ancestry_2021['Indigenous Group'].str.replace(', n.o.s.', '')

# resetting index
ancestry_2021 = ancestry_2021.reset_index(drop=True)

# renaming a specific cell
ancestry_2021.at[34, 'Indigenous Group'] = 'Métis'

# inserting a 'grouping' column and assigning most to 'first nation' grouping
ancestry_2021.insert(1, "Grouping", True)
ancestry_2021 = ancestry_2021.assign(Grouping = 'First Nation')

# renaming specific cells
ancestry_2021.at[33,'Grouping'] = 'Inuit'
ancestry_2021.at[34,'Grouping'] = 'Métis'
ancestry_2021.at[0,'Grouping'] = 'None'
ancestry_2021.at[0,'Indigenous Group'] = 'Other'
ancestry_2021.at[32,'Indigenous Group'] = 'First Nations ancestry, n.i.e.'

# removing select characters and parenthesis and everything inside
ancestry_2021['Indigenous Group'] = ancestry_2021['Indigenous Group'].str.replace(' origins', '')
ancestry_2021['Indigenous Group'] = ancestry_2021['Indigenous Group'].str.replace(r"\(.*\)","")

# displaying dataset
ancestry_2021
print(ancestry_2021.to_markdown())

# converting dataset to .csv file
ancestry_2021.to_csv('ancestry_2021.csv', encoding='utf-8', index=False)

  ancestry_2021['Indigenous Group'] = ancestry_2021['Indigenous Group'].str.replace(', n.o.s.', '')
  ancestry_2021['Indigenous Group'] = ancestry_2021['Indigenous Group'].str.replace(r"\(.*\)","")


|    | Indigenous Group               | Grouping     | Homogenous Ancestry   | Heterogenous Ancestry   |
|---:|:-------------------------------|:-------------|:----------------------|:------------------------|
|  0 | Other                          | None         | 193,110               | 1,735                   |
|  1 | First Nations                  | First Nation | 613,125               | 19,220                  |
|  2 | Abenaki                        | First Nation | 16,310                | 2,105                   |
|  3 | Anishinaabe                    | First Nation | 152,640               | 37,075                  |
|  4 | Apache                         | First Nation | 995                   | 275                     |
|  5 | Atikamekw                      | First Nation | 7,630                 | 770                     |
|  6 | Blackfoot                      | First Nation | 18,540                | 4,660                   |
|  7 | Cherokee                       | First Nation | 

In [8]:
# 2021 Education Dataset

# shifting the rows to change the header start
education_2021.columns = education_2021.iloc[0]

# dropping columns
education_2021 = education_2021.drop(columns = ['Total - Indigenous identity 8', 'Indigenous identity 9', 'Single Indigenous responses 10', 'Non-Indigenous identity'])

# dropping rows
education_2021 = education_2021.drop([0,1,2,5,6,8,9,12,14])

# resetting index
education_2021 = education_2021.reset_index(drop=True)

# renaming specific cell
education_2021.at[1, 'Indigenous identity (9) 6'] = 'Secondary (high) school diploma or equivalency certificate'

# removing numbers from columns
education_2021['Indigenous identity (9) 6'] = education_2021['Indigenous identity (9) 6'].str.replace('\d+', '', regex=True)

# renaming columns
education_2021 = education_2021.rename(columns = {'Indigenous identity (9) 6':'Scolarity Level', 'First Nations (North American Indian)':'First Nations', 'MÃ©tis':'Métis', \
'Multiple Indigenous responses 11':'Heterogenous Ancestry', 'Indigenous responses not included elsewhere 12':'First Nations ancestry, n.i.e.'})

# displaying columns
education_2021
print(education_2021.to_markdown())

# converting dataset to .csv file
education_2021.to_csv('education_2021.csv', encoding='utf-8', index=False)

|    | Scolarity Level                                                 | First Nations   | Métis   | Inuit   | Heterogenous Ancestry   | First Nations ancestry, n.i.e.   |
|---:|:----------------------------------------------------------------|:----------------|:--------|:--------|:------------------------|:---------------------------------|
|  0 | No certificate, diploma or degree                               | 254,335         | 107,820 | 23,840  | 4,945                   | 7,600                            |
|  1 | Secondary (high) school diploma or equivalency certificate      | 228,045         | 151,370 | 11,300  | 6,525                   | 8,365                            |
|  2 | Apprenticeship or trades certificate or diploma                 | 66,525          | 51,260  | 3,365   | 1,735                   | 2,805                            |
|  3 | College, CEGEP or other non-university certificate or diploma   | 129,600         | 103,430 | 6,465   | 4,220                   | 5,0

In [9]:
# 2021 Home Dataset

# dropping rows and columns 
home_2021 = home_2021.drop([0,1,2,3])
home_2021 = home_2021.drop(columns = ['Total - Residence on or off reserve 6'])

# removing double quotes from column names for easier manipulation
home_2021.columns = home_2021.columns.str.replace('"','')

# renaming columns
home_2021 = home_2021.rename(columns = {'ï»¿Residence on or off reserve (3) 4':'Indigenous Group', 'On reserve 7':'On reserve', 'Off reserve':'Off reserve'})

# removing parenthesis and everything inside
home_2021['Indigenous Group'] = home_2021['Indigenous Group'].str.replace(r"\(.*\)","")

# isolating cell string for other dataset (naming consistency purposes)
home_2021.at[610, 'Indigenous Group'] = 'First Nations ancestry, n.i.e.'

# displaying dataset
home_2021
print(home_2021.to_markdown())

# converting dataset to .csv file
home_2021.to_csv('home.csv', encoding='utf-8', index=False)

|     | Indigenous Group                                     | On reserve   | Off reserve   |
|----:|:-----------------------------------------------------|:-------------|:--------------|
|   4 | ?Akisq'nuk First Nation                              | 110          | 100           |
|   5 | ?aq'am                                               | 95           | 270           |
|   6 | ?Esdilagh First Nation                               | 0            | 145           |
|   7 | Aamjiwnaang First Nation                             | 580          | 975           |
|   8 | Abegweit First Nation                                | 230          | 105           |
|   9 | Acadia First Nation                                  | 280          | 1,220         |
|  10 | Acho Dene Koe First Nation                           | 15           | 650           |
|  11 | Adams Lake                                           | 380          | 320           |
|  12 | Ahousaht                                            

  home_2021['Indigenous Group'] = home_2021['Indigenous Group'].str.replace(r"\(.*\)","")


In [10]:
# 2012 Income Dataset

# dropping rows and columns
income_2021 = income_2021.drop([0,1,2,3,6,7,10,11,12,13,14,15,16,17])
income_2021 = income_2021.drop(columns = ['Total - Indigenous identity 7', 'Indigenous identity 8', 'Single Indigenous responses 9', 'Non-Indigenous identity'])

# removing double quotes for easier manipulation
income_2021.columns = income_2021.columns.str.replace('"','')

# renaming columns
income_2021 = income_2021.rename(columns = {'ï»¿Indigenous identity (9) 6':'Indigenous Group', 'First Nations (North American Indian)':'First Nations', \
'MÃ©tis':'Métis', 'Inuk (Inuit)':'Inuit', 'Multiple Indigenous responses 10':'Heterogenous Ancestry', 'Indigenous responses not included elsewhere 11':'First Nations ancestry, n.i.e.'})

# resetting index
income_2021 = income_2021.reset_index(drop=True)

# displaying dataset
income_2021
print(income_2021.to_markdown())

# converting dataset to .csv file
income_2021.to_csv('income.csv', encoding='utf-8', index=False)

|    | Indigenous Group             | First Nations   | Métis   | Inuit   | Heterogenous Ancestry   | First Nations ancestry, n.i.e.   |
|---:|:-----------------------------|:----------------|:--------|:--------|:------------------------|:---------------------------------|
|  0 | Median total income ($)      | 32,400          | 39,200  | 33,200  | 34,800                  | 36,000                           |
|  1 | Average total income ($)     | 41,880          | 49,400  | 45,640  | 44,440                  | 46,920                           |
|  2 | Median after-tax income ($)  | 30,600          | 35,600  | 31,400  | 32,400                  | 33,200                           |
|  3 | Average after-tax income ($) | 37,800          | 41,800  | 39,640  | 38,480                  | 40,040                           |


In [92]:
# Creating 1 dataframe/dataset for 2016

data_2016 = pd.merge(ancestry_2016, home_2016, left_on = 'Indigenous Group', right_on = 'Indigenous Group', how='left')
data_2016 = data_2016.dropna(axis = 'index')
data_2016.insert(6, "Average total income ($)", 31519.00)
data_2016['Average total income ($)'] = data_2016['Average total income ($)'].map('{:,.2f}'.format)
data_2016 = data_2016.reset_index(drop=True)
data_2016.to_csv('data_2016.csv', encoding='utf-8', index=False)
data_2016 = data_2016.replace(',','', regex=True)

In [86]:
# Creating 1 dataframe/dataset for 2021

data_2021 = pd.merge(ancestry_2021, home_2021, left_on = 'Indigenous Group', right_on = 'Indigenous Group', how='left')
data_2021 = data_2021.dropna(axis = 'index')
data_2021.insert(6, "Average total income ($)", True)
data_2021 = data_2021.reset_index(drop=True)
data_2021.at[0, 'Average total income ($)'] = 41880
data_2021.at[1, 'Average total income ($)'] = 46920
data_2021.to_csv('data_2021.csv', encoding='utf-8', index=False)

In [93]:
print(data_2016.to_markdown())

|    | Indigenous Group   | Grouping     |   Homogenous Ancestry |   Heterogenous Ancestry |   On reserve |   Off reserve |   Average total income ($) |
|---:|:-------------------|:-------------|----------------------:|------------------------:|-------------:|--------------:|---------------------------:|
|  0 | Abenaki            | First Nation |                  8250 |                    1520 |            0 |          1420 |                      31519 |
|  1 | Algonquin          | First Nation |                 31150 |                    9735 |           70 |         12830 |                      31519 |
|  2 | Cayuga             | First Nation |                   355 |                     220 |           10 |           225 |                      31519 |
|  3 | Cree               | First Nation |                275130 |                   81525 |          320 |          3820 |                      31519 |
|  4 | Dakota             | First Nation |                  3500 |                

In [88]:
print(data_2021.to_markdown())

|    | Indigenous Group               | Grouping     | Homogenous Ancestry   | Heterogenous Ancestry   | On reserve   | Off reserve   |   Average total income ($) |
|---:|:-------------------------------|:-------------|:----------------------|:------------------------|:-------------|:--------------|---------------------------:|
|  0 | Heiltsuk                       | First Nation | 1,060                 | 555                     | 1,065        | 1,080         |                      41880 |
|  1 | First Nations ancestry, n.i.e. | First Nation | 2,480                 | 1,125                   | 50           | 95            |                      46920 |


In [96]:
x_2016 = data_2016[['Homogenous Ancestry', 'Heterogenous Ancestry']]
y_2016_on = data_2016['On reserve']
x_2016 = sm.add_constant(x_2016)
model_2016_on = sm.OLS(y_2016_on, x_2016.astype(float)).fit()
predictions_2016_on = model_2016_on.predict(x_2016.astype(float))
print_model_2016_on = model_2016_on.summary()
print(print_model_2016_on)

                            OLS Regression Results                            
Dep. Variable:             On reserve   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                 -0.112
Method:                 Least Squares   F-statistic:                    0.1408
Date:                Tue, 20 Jun 2023   Prob (F-statistic):              0.870
Time:                        21:56:54   Log-Likelihood:                -139.87
No. Observations:                  18   AIC:                             285.7
Df Residuals:                      15   BIC:                             288.4
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                   356.24



In [98]:
x_2016 = data_2016[['Homogenous Ancestry', 'Heterogenous Ancestry']]
y_2016_off = data_2016['Off reserve']
x_2016 = sm.add_constant(x_2016)
model_2016_off = sm.OLS(y_2016_off, x_2016.astype(float)).fit()
predictions_2016_off = model_2016_off.predict(x_2016.astype(float))
print_model_2016_off = model_2016_off.summary()
print(print_model_2016_off)

                            OLS Regression Results                            
Dep. Variable:            Off reserve   R-squared:                       0.239
Model:                            OLS   Adj. R-squared:                  0.137
Method:                 Least Squares   F-statistic:                     2.355
Date:                Tue, 20 Jun 2023   Prob (F-statistic):              0.129
Time:                        21:58:16   Log-Likelihood:                -167.96
No. Observations:                  18   AIC:                             341.9
Df Residuals:                      15   BIC:                             344.6
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                  1565.42

