In [38]:
import numpy as np
import pandas as pd
from os import listdir
from os.path import getsize, splitext
import requests
import math
from itertools import chain
import json

# Metadata Table

## Statistics Canada

1. Download Raw Metadata Table

In [2]:
statscan_raw_metadata_table = pd.read_json("https://www150.statcan.gc.ca/t1/wds/rest/getAllCubesList")

2. Merge Subject Codes

In [5]:
def create_subject(x):
    """Matches Subject Code ID with Subject Code Label
    
    Parameters:
    x (pandas.Table): A grouped table

    Returns:
    pandas.Series: A Series with nested Subject Codes
    """
    d={}
    subject_object=[]
    for index, value in enumerate(x['subjectCode']):
        sub_obj = {'code': value, 'name': x['English'].tolist()[index]}
        subject_object.append(sub_obj)
    d['subject'] = subject_object
    return pd.Series(d, index=['subject'])

In [14]:
lookup_subject_codes = pd.read_csv("data/lookup/lookup-subject-codes.csv")

statscan_metadata_table = (statscan_raw_metadata_table
    .explode('subjectCode')
    .astype({'subjectCode':'int64'})
    .merge(lookup_subject_codes, how='left', left_on='subjectCode', right_on= 'Subject Code')
    .groupby('productId')
    .apply(create_subject)
    .reset_index()
    .merge(statscan_raw_metadata_table, how='inner', on='productId')
    .drop(columns=['subjectCode', 'corrections'])
    .drop_duplicates(subset='productId'))



In [15]:
statscan_metadata_table

Unnamed: 0,productId,subject,cansimId,cubeTitleEn,cubeTitleFr,cubeStartDate,cubeEndDate,releaseTime,archived,surveyCode,frequencyCode,dimensions
0,10100001,"[{'code': 10, 'name': 'Government'}]",183-0021,Federal public sector employment reconciliatio...,Emploi du secteur public fédéral rapprochement...,1999-01-01,2011-01-01,2012-08-01T12:30,1,[1713],12,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
1,10100002,"[{'code': 10, 'name': 'Government'}]",191-0002,Central government debt,Dette du gouvernement central,2009-04-01,2021-10-01,2021-12-20T13:30,2,[7514],6,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
2,10100003,"[{'code': 10, 'name': 'Government'}]",176-0079,Government of Canada debt securities: Gross ne...,Titres d’emprunt du gouvernement du Canada : é...,1975-01-01,2021-11-01,2021-12-22T13:30,2,[7502],6,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
3,10100004,"[{'code': 10, 'name': 'Government'}]",176-0013,"Chartered banks, total claims and liabilities ...","Banques à charte, ensembles des créances et en...",1978-04-01,2021-07-01,2021-12-21T13:30,2,[7502],9,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
4,10100005,"[{'code': 10, 'name': 'Government'}]",385-0041,Canadian Classification of Functions of Govern...,Classification canadienne des fonctions des ad...,2008-01-01,2020-01-01,2021-11-26T13:30,2,[5218],12,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
...,...,...,...,...,...,...,...,...,...,...,...,...
5994,46100053,"[{'code': 46, 'name': 'Housing'}]",,Ownership type and property use by residential...,Type de propriétaire et usage de la propriété ...,2018-01-01,2020-01-01,2022-01-06T13:30,2,[5257],12,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
5995,46100054,"[{'code': 46, 'name': 'Housing'}]",,Residency ownership and property use by reside...,Résidence de la propriété et usage de la propr...,2018-01-01,2020-01-01,2022-01-06T13:30,2,[5257],12,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
5996,46100055,"[{'code': 46, 'name': 'Housing'}]",,"Persons living with housing problems, by selec...","Personnes éprouvant des problèmes de logement,...",2018-01-01,2018-01-01,2020-10-02T12:30,2,[5269],13,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."
5997,46100056,"[{'code': 46, 'name': 'Housing'}]",,"Core housing need, by tenure including first-t...","Besoins impérieux en matière de logement, selo...",2018-01-01,2018-01-01,2020-10-02T12:30,2,[5269],13,"[{'dimensionNameEn': 'Geography', 'dimensionNa..."


## City of Edmonton Metadata

1. Configuration

In [22]:
edmonton_socrata_url = "http://api.us.socrata.com/api/catalog/v1?domains=data.edmonton.ca"
edmonton_socrata_request_length = requests.get(edmonton_socrata_url, params={"only":"datasets"}).json()["resultSetSize"]
edmonton_socrata_request_paginate = math.ceil(edmonton_socrata_request_length/100)




2. Paginate through metadata return

In [35]:
edmonton_socrata_raw_request = []

for offset_counter in range(edmonton_socrata_request_paginate):
    edmonton_socrata_raw = requests.get(edmonton_socrata_url, params={"only":"datasets", "offset":offset_counter}).json()
    edmonton_socrata_raw_request.append(edmonton_socrata_raw["results"])

edmonton_socrata_raw_metadata = list(chain.from_iterable(edmonton_socrata_raw_request))

In [37]:
len(edmonton_socrata_raw_metadata)

1300

In [39]:
with open("edmonton_socrata_raw_metadata.json", "w") as f:
    json.dump(edmonton_socrata_raw_metadata, f)