In [50]:
import requests
import pandasdmx as sdmx
import pandas as pd
import geopandas as gpd
from itertools import batched
import os

## Census Subdivision - Calgary

In [7]:
#https://www12.statcan.gc.ca/census-recensement/2021/geo/sip-pis/boundary-limites/index2021-eng.cfm?year=21

census_subdivisions = gpd.read_file("../../data/CensusSubdivision/lcsd000b21a_e.shp")
calgary_boundary = census_subdivisions[census_subdivisions["CSDNAME"]=="Calgary"].copy()
calgary_boundary['geometry'] = calgary_boundary.geometry.buffer(-100) #buffer to so we don't get surrounding Disseminations Areas

## Dissemination Areas

In [8]:
#https://www12.statcan.gc.ca/census-recensement/2021/geo/sip-pis/boundary-limites/index2021-eng.cfm?year=21

DA = gpd.read_file("../../data/DisseminationArea/lda_000b21a_e.shp").sjoin(calgary_boundary, how="inner")

In [9]:
DAs = DA["DGUID_left"].to_list()

## Statistical Data and Metadata eXchange (SDMX) 

In [778]:
#https://www12.statcan.gc.ca/wds-sdw/2021profile-profil2021-eng.cfm

In [11]:
# ADD SOURCE
sdmx.source.add_source({
    "id":"STC_CP",
    "url":"https://api.statcan.gc.ca/census-recensement/profile/sdmx/rest",
    "documentation": "https://www12.statcan.gc.ca/wds-sdw/2021profile-profil2021-eng",
    "name": "Statistics Canada",
    "supported": {"codelist": False, "preview": True}
},override=True)

### Get Code List Mapping for Census Profile Characteristics

In [12]:
headers = {
    "Accept":"application/vnd.sdmx.structure+json;version=1.0"
}
r = requests.get("https://api.statcan.gc.ca/census-recensement/profile/sdmx/rest/codelist/STC_CP/CL_CHARACTERISTIC/", headers=headers)

In [13]:
codelist = r.json()["data"]["codelists"][0]["codes"]
# code_df = pd.DataFrame([{"code":i["id"],"charac_id":i["annotations"][0]["text"],"name":i["name"]} for i in codelist])
codes = [i["id"] for i in codelist]
charac_id = [i["annotations"][0]["text"] for i in codelist]
code_dict = dict(zip(charac_id, codes))

### Get Data

In [14]:
headers = {
    "Accept":"application/vnd.sdmx.genericdata+xml;version=2.1"
}

s = requests.Session()
s.headers.update(headers)
sc= sdmx.Request("STC_CP", session=s)

In [15]:
# da_msg = sc.dataflow("DF_DA", params={"references":"codelist"})

In [16]:
# cl = sdmx.to_pandas(da_msg.codelist.CL_CHARACTERISTIC)
# cl.head()
# cl[cl["name"].str.contains("Total - LIM low-income status in 2020 for the population in private households")].index[0]

In [17]:
chars = {
    "1683":"Visible_Minority_Total",
    "1684":"Visible_Minority",
    "2230":"Unemployment_rate",
    "8":"Total_age",
    "10":"Total_age_0_4",
    "24":"Total_age_65_older",
    "338":"Total_LIM_18_64",
    "339":"Total_LIM_65_over",
    "343":"LIM_18_64",
    "344":"LIM_65_over",
    "383":"Language_Total",
    "387":"Language_NO_Eng_Fr"
}

In [18]:
# Geographic Unique Identifier
# DGUID=["2021S051248060056"]
DGUID = DA["DGUID_left"].to_list()

In [19]:
mapped_chars = {code_dict[k]:chars[k] for k in chars}

In [21]:
def get_key(chars, gui, schema):
    dguids = "+".join(gui)
    stat_cols ="+".join(mapped_chars)
    return f"A5.{dguids}.1.{stat_cols}.1"

In [52]:
filename = "DA_Calgary_census_data.csv"
for batch in batched(DGUID, 200):
    key = get_key(chars, batch, schema)
    data = sc.data('STC_CP,DF_DA', key=key)
    df = pd.DataFrame(data.to_pandas()).reset_index().drop(['FREQ', 'GENDER',"STATISTIC","TIME_PERIOD"], axis=1)

    df1 = df.pivot(index="REF_AREA", columns="CHARACTERISTIC").rename(columns=mapped_chars)
    df1.columns = df1.columns.droplevel()
    if not os.path.isfile(filename):
        df1.to_csv(filename, mode="a")
    else: 
        df1.to_csv(filename, mode="a", header=False)
    

batch...
batch...
batch...
batch...
batch...
batch...
batch...
batch...
batch...
