# Import Statements

In [1071]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
from IPython.display import display
import json
import numpy as np
import folium

# Part 1, Task 1

In [1072]:
#Read the data, display first few rows
pop_21 = pd.read_csv("Canada Population 2021 Data.csv")
display(pop_21.head())

Unnamed: 0,Location,Non_Indigenous,First_Nations,Inuit,Métis
0,Canada,34124000,1426950,82010,560335
1,Newfoundland and Labrador,443310,40415,10245,6715
2,Prince Edward Island,144930,3755,255,915
3,Nova Scotia,890045,40715,1860,18310
4,New Brunswick,712025,31850,1025,9445


In [1073]:
#Reformat the data with provided melt
melted = pd.melt(pop_21,id_vars=['Location'],value_vars=['First_Nations','Inuit','Métis','Non_Indigenous'], var_name='Population',value_name='Count')

#Subset to just canada-wide data, remove underscores from the labels
canada = melted[melted['Location'] == 'Canada'].copy()
canada['Population'] = canada['Population'].str.replace('_', ' ')

#Extract labels and values
labels = canada['Population'].value_counts().index
values = canada['Count'].values
canada

Unnamed: 0,Location,Population,Count
0,Canada,First Nations,1426950
14,Canada,Inuit,82010
28,Canada,Métis,560335
42,Canada,Non Indigenous,34124000


In [1074]:
#Figure Parameters
Colors = [
	"#ffcc00", #First Nations
	"#fc0a0a", #Inuit
	"#f1750f", #Metis
    "#9B9B9B", #Non Indigenous
]
Label_Colors = [
	"#d1a000", #First Nations
	"#b80101", #Inuit
	"#cf6610", #Metis
    "#6D6C6C", #Non Indigenous
]
Hole_Size = 0.65 #Center hole size
Center_fs = 50 #Center font size
Label_fs = 36 #Label font size
Div_lw = 1 #Division line width
Div_lc = "#202020" #Division line color
Hover_fs = 40 #Hover font size
Hover_bg = "#F0EFEF" #Hover background color
Hover_fc = "#1F1F1F" #Hover font color

In [1075]:
#Create the initial donut chart
fig = go.Figure(go.Pie(labels=labels, values=values, hole=Hole_Size, marker_colors=Colors))

#Add center text, remove legend, set side
fig.update_layout(annotations=[
    dict(
        text="<b>Canada<br>Total Population:</b><br>38.5 million",
        x=0.5,
        y=0.5,
        showarrow=False,
        font=dict(size=Center_fs, color="#212122") #These text dicts are easier IMO
    )],
    height=1000,
    width=1600,
    showlegend=False,
)

#Format the labels
fig.update_traces(
    textposition="outside",
    texttemplate="<b>%{label}</b><br>%{percent}",
    textfont=dict(size=Label_fs, color=Label_Colors),
    marker_line_width=Div_lw,
    marker_line_color=Div_lc,
)

#Rotate chart to put labels on the sides, define the margins
fig.update_traces(
    rotation=110,
)
fig.update_layout(
	margin=dict(t=50, b=50, l=50, r=50),
    uniformtext_minsize=12, uniformtext_mode="hide"
)

#Format the hover tooltip
fig.update_traces(hovertemplate="%{label}<br>Count: %{value:,}<extra></extra>")
fig.update_layout(
    hoverlabel=dict(
        font_size=Hover_fs,
        bgcolor=Hover_bg,
        font_color=Hover_fc)
)

#Save the figure
fig.write_image("donut.png")
fig.write_html("donut.html")

#Show the figure
fig.show()

# Part 2, Task 1

Did a bit extra for this, wanted it to look good. I have also done stuff like this before so used some of my previous code as a base.

In [1076]:
#Load csv data, remove canada-wide data, show data
df = pd.read_csv("Canada Population 2021 Data.csv")
df = df[df['Location'] != 'Canada'].copy()
display(df)

Unnamed: 0,Location,Non_Indigenous,First_Nations,Inuit,Métis
1,Newfoundland and Labrador,443310,40415,10245,6715
2,Prince Edward Island,144930,3755,255,915
3,Nova Scotia,890045,40715,1860,18310
4,New Brunswick,712025,31850,1025,9445
5,Quebec,13511270,288525,18025,44275
6,Ontario,13511270,332555,6290,131775
7,Manitoba,1067780,140250,1150,88300
8,Saskatchewan,912150,129225,715,56055
9,Alberta,3854700,188315,4235,115455
10,British Columbia,4604060,209320,2340,85205


In [1077]:
#Add Percent Indigenous column, Total Population column
_ = (df['First_Nations'] + df['Inuit'] + df['Métis'])
_2 = _ + df['Non_Indigenous']
prcts = 100*_/(_ + df['Non_Indigenous'])
df['Percent_Indigenous'] = prcts.round(0).astype(int).astype(str) + '%'
df['Total_Population'] = _2

#Create numeric Percent Indigenous column for mapping
df["Percent_Indigenous_num"] = pd.to_numeric(
    df["Percent_Indigenous"].astype(str).str.strip().str.rstrip('%'),
    errors="coerce"
)

#Fix the 'Yukon' name as the json uses 'Yukon Territory', display data
df.loc[df['Location'] == 'Yukon', 'Location'] = 'Yukon Territory'
display(df)

# Create the tooltip df
tooltip_df = df[[
    "Location",
    "Percent_Indigenous",
    "Total_Population",
    "First_Nations",
    "Inuit",
    "Métis"
]].rename(columns={
    "Location": "Province/Territory",
    "Percent_Indigenous": "Percent Indigenous",
    "Total_Population": "Total Population",
    "First_Nations": "First Nations",
})

Unnamed: 0,Location,Non_Indigenous,First_Nations,Inuit,Métis,Percent_Indigenous,Total_Population,Percent_Indigenous_num
1,Newfoundland and Labrador,443310,40415,10245,6715,11%,500685,11
2,Prince Edward Island,144930,3755,255,915,3%,149855,3
3,Nova Scotia,890045,40715,1860,18310,6%,950930,6
4,New Brunswick,712025,31850,1025,9445,6%,754345,6
5,Quebec,13511270,288525,18025,44275,3%,13862095,3
6,Ontario,13511270,332555,6290,131775,3%,13981890,3
7,Manitoba,1067780,140250,1150,88300,18%,1297480,18
8,Saskatchewan,912150,129225,715,56055,17%,1098145,17
9,Alberta,3854700,188315,4235,115455,7%,4162705,7
10,British Columbia,4604060,209320,2340,85205,6%,4900925,6


In [1078]:
#Load the geojson file, display data
with open("Canada Provinces Data.json", "r", encoding="utf-8") as f:
    geo = json.load(f)
display(geo)

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[-78.019173, 62.59193],
       [-77.867218, 62.589161],
       [-77.837784, 62.556938],
       [-78.103333, 62.559158],
       [-78.107773, 62.582771],
       [-78.019173, 62.59193]]],
     [[[-69.182503, 59.128601],
       [-69.198608, 59.067211],
       [-69.132767, 59.055271],
       [-69.221123, 58.991661],
       [-69.223053, 58.95332],
       [-69.290283, 58.977211],
       [-69.35556, 58.949711],
       [-69.278343, 59.066669],
       [-69.357224, 59.139721],
       [-69.275284, 59.154991],
       [-69.182503, 59.128601]]],
     [[[-79.496384, 56.766941],
       [-79.486107, 56.658039],
       [-79.523621, 56.66721],
       [-79.567497, 56.615269],
       [-79.583893, 56.652771],
       [-79.549438, 56.68111],
       [-79.551941, 56.73193],
       [-79.589996, 56.76833],
       [-79.58168, 56.807209],
       [-79.496384, 56.766941]]],
     [[[-78.900284, 

In [1079]:
#Binning Func, kinda fixated on making it use nice intervals...
def nice_binning(to_bin, k=11):
    #Get numeric values, find min and max
    values = pd.to_numeric(to_bin).values
    minval, maxval = float(np.min(values)), float(np.max(values))

    #Find Bins  
    target = (maxval - minval)/k
    mag = 10**np.floor(np.log10(target))
    for base in (1, 2, 5, 10):
        step = base*mag
        if (maxval - minval)/step <= k:
            break
    start = np.floor(minval/step)*step
    end = np.ceil(maxval/step)*step
    edges = np.arange(start, end + step, step, dtype=float)

    #Pad the edged a bit to ensure all values fit
    pad = max(1.0, abs(edges[-1] - edges[0]))*1e-9
    edges[0] = min(edges[0], minval) - pad
    edges[-1] = max(edges[-1], maxval) + pad
    return edges.tolist()

In [1080]:
#Tooltip GeoJSON merge
_lookup = tooltip_df.set_index("Province/Territory").to_dict(orient="index")
def attach_properties(feature):
    nm = feature["properties"].get('name')
    if nm in _lookup:
        feature["properties"].update(_lookup[nm])
    feature["properties"]["Province/Territory"] = nm
    return feature

geo_with_data = {"type":"FeatureCollection","features":[attach_properties(dict(f)) for f in geo["features"]]}

In [1081]:
#To make tooltip
def make_tooltip():
    return folium.GeoJsonTooltip(
        fields=[
            "Province/Territory",
            "Total Population",
            "Percent Indigenous",
            "First Nations",
            "Inuit",
            "Métis"
        ],
        aliases=[
            "<b>Province/Territory</b>",                               
            "<b>Total Population</b>",
            "<b>Percent Indigenous</b>",
            "<b>First Nations</b>",
            "<b>Inuit</b>",
            "<b>Métis</b>"
        ],
        localize=True,
        sticky=True,
        labels=True,
        style='''
            background-color: #ffffff;
            border: 2px solid black;
			border-radius: 3px;
			box-shadow: 3px;
	   		font-size:16px;
            line-height:1.25;
		''',
        max_width=800
    )

In [1082]:
#Create map, add border pane
m = folium.Map(location=[65.0, -96.0], zoom_start=4, tiles=None, control_scale=True, height=1000, width=1600)
folium.map.CustomPane("border_pane",  z_index=500).add_to(m)

<folium.map.CustomPane at 0x1a41a1a0910>

In [1083]:
#To add layers to map
def add_layer(layer_title, column, palette, k=11, show=False):
    col_map = {"Percent_Indigenous": "Percent_Indigenous_num",}
    df_col = col_map.get(column, column)
    bins = nice_binning(df[df_col], k=k)
    ch = folium.Choropleth(
        geo_data=geo,
        data=df,
        columns=["Location", df_col],
        key_on=f"feature.properties.name",
        fill_color=palette,
        bins=bins,
        fill_opacity=0.85,
        line_opacity=0.30,
        nan_fill_color="#f0f0f0",
        legend_name=layer_title,
        name=layer_title,
        control=True,
        overlay=False,
        show=show
    ).add_to(m)

    #To add tooltip and border to each layer
    folium.GeoJson(
        data=geo_with_data,
        name=f"{layer_title} details",
        smooth_factor=2,
        style_function=lambda _: {"color":"black", "fillColor":"transparent", "weight":0.5},
        tooltip=make_tooltip(),
        highlight_function=lambda _: {"weight":3, "fillColor":"grey"},
        pane="border_pane"
    ).add_to(ch)
    
#Actually adding the layers
add_layer("Percent Indigenous", "Percent_Indigenous", "YlOrRd", show=True)  # % values
add_layer("First Nations", "First_Nations",  "GnBu")
add_layer("Inuit", "Inuit", "PuRd")
add_layer("Métis", "Métis", "YlGn")
folium.LayerControl(collapsed=False, position="topleft").add_to(m)

<folium.map.LayerControl at 0x1a40c68f570>

In [1084]:
#Add title, save
title_text = "Indigenous Populations of Canada (2021)"
m.get_root().html.add_child(folium.Element(
    f'<h3 align="center" style="font-size:40px;color:#333;margin-top:8px;">{title_text}</h3>'
))
m.save("map.html")

# Part 3, Task 1

In [1085]:
#Load data, format
df = pd.read_csv("Canada Merge Data.csv")
df = df[df["Domain"].notna()].copy()
df["Domain"] = df["Domain"].replace({"NonIndigenous [reference]": "Non-Indigenous"})
df_plot = df.dropna(subset=["CR_PERC", "CR_FI"])
df_plot = df_plot[df_plot['Domain'] != 'Missing']

#Replace all rows with domain 'First Nations off reserve' to 'First Nations'
df_plot['Domain'] = df_plot['Domain'].replace({'First Nations off reserve': 'First Nations'})

#Display the data and value counts
display(df_plot)
display(df_plot['Domain'].value_counts())

Unnamed: 0,Geography,Domain,Sex,NUM_HSC,DENOM_HSC,CR_HSC,NUM_MOOD,DENOM_MOOD,CR_MOOD,NUM_ANX,...,CR_PERC,NUM_ST,DENOM_ST,CR_ST,NUM_BELONG,DENOM_BELONG,CR_BELONG,NUM_FI,DENOM_FI,CR_FI
0,Newfoundland and Labrador,First Nations,Both sexes,5397.0,27878.0,19.36,3638.0,28830,12.62,4674.0,...,8.42,2278.0,27714.0,8.219672,20981.0,27932,75.12,1939.0,14354.0,13.51
3,Newfoundland and Labrador,Non-Indigenous,Both sexes,51198.0,365601.0,14.00,37109.0,382278,9.71,42965.0,...,5.90,31047.0,364338.0,8.521483,286585.0,367065,78.07,15574.0,192226.0,8.10
7,Newfoundland and Labrador,Non-Indigenous,Males,18004.0,178154.0,10.11,12786.0,187700,6.81,14686.0,...,5.48,12188.0,177686.0,6.859291,138995.0,177644,78.24,6346.0,93968.0,6.75
8,Newfoundland and Labrador,First Nations,Females,3596.0,14965.0,24.03,2221.0,15221,14.59,3070.0,...,8.80,1550.0,14860.0,10.430686,11558.0,14858,77.79,1112.0,7489.0,14.84
11,Newfoundland and Labrador,Non-Indigenous,Females,33194.0,187447.0,17.71,24324.0,194578,12.50,28279.0,...,6.30,18859.0,186652.0,10.103830,147590.0,189420,77.92,9227.0,98258.0,9.39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144,Northwest Territories,First Nations,Females,1253.0,4735.0,26.47,490.0,5325,9.21,567.0,...,17.24,780.0,4735.0,16.473073,4067.0,5245,77.53,1701.0,4964.0,34.26
147,Northwest Territories,Non-Indigenous,Females,1631.0,7938.0,20.54,798.0,7680,10.39,712.0,...,6.60,1111.0,7796.0,14.250898,6362.0,7609,83.62,243.0,7529.0,3.23
149,Nunavut,Inuit,Both sexes,2817.0,20038.0,14.06,1797.0,20483,8.78,1656.0,...,11.18,5375.0,19978.0,26.904595,16594.0,19848,83.60,12719.0,19254.0,66.06
153,Nunavut,Inuit,Males,1353.0,10331.0,13.10,722.0,10459,6.90,600.0,...,10.15,2977.0,10333.0,28.810607,8209.0,10190,80.56,6890.0,9830.0,70.09


Domain
Non-Indigenous    35
First Nations     28
Métis             21
Inuit              3
Name: count, dtype: int64

In [1086]:
#Plotting variables
Colors = {
    "Non-Indigenous": "#9B9B9B",
    "First Nations": "#ffcc00",
    "Métis": "#f1750f",
    "Inuit": "#fc0a0a"
}
Symbols = {
    "Non-Indigenous": "circle",
    "First Nations": "diamond",
    "Métis": "square",
    "Inuit": "hexagon"
}
Marker_Size = 10
Marker_Line = dict(width=1, color="black")

In [1087]:
#Create animated scatter plot
fig = px.scatter(
    df_plot,
    x="CR_PERC",
    y="CR_FI",
    color="Domain",
    symbol="Domain",
    color_discrete_map=Colors,
    symbol_map=Symbols,
    animation_frame="Sex",
    hover_name="Geography",
    hover_data={
        "CR_PERC": ":.2f",
        "CR_FI": ":.2f",
        "DENOM_PERC": True,
        "DENOM_FI": True,
        "Domain": False,
        "Sex": False
    },
    labels={
        "CR_PERC": "Fair or Poor Mental Health (%)",
        "CR_FI": "Food Insecurity (%)",
        "DENOM_PERC": "Survey Size (Mental Health)",
        "DENOM_FI": "Survey Size (Food Insecurity)",
        "Domain": "Population Group",
        "Sex": "Sex"
    },
    height=900,
    width=900
)

#Formatting
fig.update_traces(marker=dict(size=Marker_Size, line=Marker_Line))
fig.update_layout(
    title=(
        "<b>Mental Health and Food Insecurity in<br>Indigenous and Non-Indigenous Populations</b>"
    ),
    title_x=0.5,
    title_font=dict(size=25),
    xaxis=dict(
        range=[0, 30],
        title_font=dict(size=22),
        tickfont=dict(size=18)
    ),
    yaxis=dict(
        range=[0, 75],
        title_font=dict(size=22),
        tickfont=dict(size=18)
    ),
    legend=dict(
        title="Population Group",
        font=dict(size=18),
        bgcolor="#FFFFFF"
    ),
    legend_x=0.75,
    legend_y=0.9,
    margin=dict(l=60, r=60, t=80, b=60),
    plot_bgcolor="white",
    hoverlabel=dict(font_size=16, bgcolor="white", font_color="#1F1E1E"),
)

#Save and show fig
fig.write_html("scatter.html")
fig.write_image("scatter.png", scale=3)
fig.show()

# Part 5

In [1088]:
#Load & process data, display
df = pd.read_csv("Canada Population 2021 Data.csv")
df = df[df['Location'] != 'Canada'].copy()
df['Total Population'] = df['First_Nations'] + df['Inuit'] + df['Métis'] + df['Non_Indigenous']
df['Pct'] = (100*(df['Total Population'] - df['Non_Indigenous'])/df['Total Population']).round(2)
df.sort_values(by='Pct', ascending=False, inplace=True)
df = df.iloc[:3].copy()
Labels = ['First Nations', 'Inuit', 'Métis', 'Non Indigenous']
display(df)

Unnamed: 0,Location,Non_Indigenous,First_Nations,Inuit,Métis,Total Population,Pct
13,Nunavut,5250,610,30855,125,36840,85.75
12,Northwest Territories,20280,14005,4680,2525,41490,51.12
11,Yukon,34835,7405,335,1235,43810,20.49


In [1089]:
#Figure Parameters
Colors = [
	"#ffcc00", #First Nations
	"#fc0a0a", #Inuit
	"#f1750f", #Metis
    "#9B9B9B", #Non Indigenous
]
Label_Colors = [
	"#d1a000", #First Nations
	"#b80101", #Inuit
	"#cf6610", #Metis
    "#6D6C6C", #Non Indigenous
]
Hole_Size = 0.65 #Center hole size
Label_fs = 18 #Label font size
Div_lw = 1 #Division line width
Div_lc = "#202020" #Division line color
Hover_fs = 20 #Hover font size
Hover_bg = "#F0EFEF" #Hover background color
Hover_fc = "#1F1F1F" #Hover font color
Legend_fs = 25 #Legend font size

In [1090]:
#Create figure, subplots
fig = make_subplots(
	rows=1,
	cols=3,
	specs=[[{"type":"domain"}, {"type":"domain"}, {"type":"domain"}]],
	subplot_titles=[f"<b>{row['Location']}</b><br>Indigenous (%): {row['Pct']}%" for _, row in df.iterrows()],
	horizontal_spacing=0.05
)

#Create singular legend
show_legends = [True, False, False]
for i, (idx, row) in enumerate(df.iterrows()):
	fig.add_trace(
		go.Pie(
			labels=Labels,
			values=[
				row['First_Nations'],
				row['Inuit'],
				row['Métis'],
				row['Non_Indigenous']
			],
			hole=Hole_Size,
			marker_colors=Colors,
			texttemplate="%{percent}",
			insidetextfont=dict(color="white",  size=Label_fs),
    	outsidetextfont=dict(color="#1f1f1f", size=Label_fs),
			marker_line_width=Div_lw,
			marker_line_color=Div_lc,
			hovertemplate="%{label}<br>Count: %{value:,}<extra></extra>",
			showlegend=show_legends[i]
		),
		row=1,
		col=i+1
	)
 
#Layout formatting
fig.update_layout(
    height=750, width=1500,
    margin=dict(t=100, b=20, l=50, r=50),
    paper_bgcolor="#fafafa",
    plot_bgcolor="#fafafa",
    title=dict(
        text="<b>Top 3 Provinces by Indigenous Population Share (2021)</b>",
        x=0.5, xanchor="center",
        font=dict(size=35, color="#212122")
    ),
    legend=dict(
        orientation="h",
        x=0.5, xanchor="center",
        y=1.0, yanchor="bottom",
        font=dict(size=Legend_fs),
        bgcolor="#fafafa"
    ),
    uniformtext_minsize=12,
    uniformtext_mode="hide"
)

#Adjust subplot title positions & font sizes
for i in range(3):
    fig.layout.annotations[i].update(
        y=0.9,
        yanchor="bottom",
        font=dict(size=22),
        align="center"
    )

#Rotate pie to make labels on side
fig.update_traces(rotation=110)

#Save figure and show
fig.write_html("subplots.html")
pio.write_image(fig, "subplots.png", scale=3)
fig.show()