## Metadata

In [1]:
import pandas as pd
pd.set_option('display.max_rows', 500)

In [2]:
meta_df = pd.read_csv('../data/cache/11100045_MetaData.csv')
meta_df

Unnamed: 0,Cube Title,Product Id,CANSIM Id,URL,Cube Notes,Archive Status,Frequency,Start Reference Period,End Reference Period,Total number of dimensions
0,Registered Retirement Savings Plan (RRSP) room...,11100045,111-0040,https://www150.statcan.gc.ca/t1/tbl1/en/tv.act...,1;2;3;4,CURRENT - a cube available to the public and t...,Annual,2000-01-01,2016-01-01,2.0
1,Dimension ID,Dimension name,Dimension Notes,Dimension Definitions,,,,,,
2,1,Geography,12,,,,,,,
3,2,Registered Retirement Savings Plan (RRSP) room,2,,,,,,,
4,Dimension ID,Member Name,Classification Code,Member ID,Parent Member ID,Terminated,Member Notes,Member Definitions,,
5,1,Canada,11124,1,,,,,,
6,1,Newfoundland and Labrador,10,2,1,,,,,
7,1,"St. John's, Newfoundland and Labrador",001,3,2,,,,,
8,1,Prince Edward Island,11,4,1,,,,,
9,1,Nova Scotia,12,5,1,,,,,


## Meta Info

In [3]:
meta_info = meta_df.head(1)
meta_info

Unnamed: 0,Cube Title,Product Id,CANSIM Id,URL,Cube Notes,Archive Status,Frequency,Start Reference Period,End Reference Period,Total number of dimensions
0,Registered Retirement Savings Plan (RRSP) room...,11100045,111-0040,https://www150.statcan.gc.ca/t1/tbl1/en/tv.act...,1;2;3;4,CURRENT - a cube available to the public and t...,Annual,2000-01-01,2016-01-01,2.0


## Dimensions

In [4]:
dimension2_row = meta_df[meta_df['Cube Title'] =='Dimension ID'].tail(1).index.item()
dimension1= meta_df.iloc[2:dimension2_row,0:4]   
dimension1.columns = ['Dimension ID','Dimension name','Dimension Notes','Dimension Definitions']
dimension1

Unnamed: 0,Dimension ID,Dimension name,Dimension Notes,Dimension Definitions
2,1,Geography,12,
3,2,Registered Retirement Savings Plan (RRSP) room,2,


In [5]:
symbol_row = meta_df[meta_df['Cube Title'] =='Symbol Legend'].index.item()
symbol_row

211

In [6]:
dimension2 = meta_df.iloc[dimension2_row+1:symbol_row,0:8]
dimension2.columns = ['Dimension ID','Member Name','Classification Code','Member ID','Parent Member ID',
                      'Terminated','Member Notes','Member Definitions']
dimension2

Unnamed: 0,Dimension ID,Member Name,Classification Code,Member ID,Parent Member ID,Terminated,Member Notes,Member Definitions
5,1,Canada,11124,1,,,,
6,1,Newfoundland and Labrador,10,2,1.0,,,
7,1,"St. John's, Newfoundland and Labrador",001,3,2.0,,,
8,1,Prince Edward Island,11,4,1.0,,,
9,1,Nova Scotia,12,5,1.0,,,
10,1,"Halifax, Nova Scotia",205,6,5.0,,,
11,1,New Brunswick,13,7,1.0,,,
12,1,"Saint John, New Brunswick",310,8,7.0,,,
13,1,Quebec,24,9,1.0,,,
14,1,"Saguenay, Quebec",408,10,9.0,,11,


## Survey

In [7]:
survey_row = meta_df[meta_df['Cube Title'] =='Survey Code'].index.item()
survey = meta_df.iloc[survey_row+1:survey_row+2,0:2]
survey.columns = ['Survey Code', 'Survey Name']
survey

Unnamed: 0,Survey Code,Survey Name
228,4106,"Financial Data and Charitable Donations, Preli..."


## Subject 

In [8]:
subject_row = meta_df[meta_df['Cube Title'] =='Subject Code'].index.item()
subject = meta_df.iloc[subject_row+1:subject_row+2,0:2]
subject.columns = ['Subject Code', 'Subject Name']
subject

Unnamed: 0,Subject Code,Subject Name
230,11,"Income, pensions, spending and wealth"


## Notes

In [9]:
note_row = meta_df[meta_df['Cube Title'] =='Note ID'].index.item()
correction_row = meta_df[meta_df['Cube Title'] =='Correction ID'].index.item()

note = meta_df.iloc[note_row+1:correction_row,0:2]
note.columns = ['Note ID', 'Note']
note

Unnamed: 0,Note ID,Note
232,1,"Information on the data source, the historical..."
233,2,New Registered Retirement Savings Plan (RRSP) ...
234,3,Persons with Registered Retirement Savings Pla...
235,4,Taxfilers are people who filed a tax return fo...
236,5,The census metropolitan area (CMA) of Ottawa-G...
237,6,Total Registered Retirement Savings Plan (RRSP...
238,7,Unused room is the amount of the Registered Re...
239,8,Characteristics such as age are as of December...
240,9,Median is the middle number in a group of numb...
241,10,Earned income is the income used to determine ...


In [10]:
class DatasetMetadata(object):
    
    def __init__(self, meta_df:pd.DataFrame):
        self.meta_info = meta_df.head(1)
        
        dimension2_row = meta_df[meta_df['Cube Title'] =='Dimension ID'].tail(1).index.item()
        self.dimension1 = meta_df.iloc[2:dimension2_row,0:4]   
        self.dimension1.columns = ['Dimension ID','Dimension name','Dimension Notes','Dimension Definitions']
        
        symbol_row = meta_df[meta_df['Cube Title'] =='Symbol Legend'].index.item()
        self.dimension2 = meta_df.iloc[dimension2_row+1:symbol_row,0:8]
        self.dimension2.columns = ['Dimension ID','Member Name','Classification Code','Member ID','Parent Member ID',
                              'Terminated','Member Notes','Member Definitions']
        
        survey_row = meta_df[meta_df['Cube Title'] =='Survey Code'].index.item()
        self.survey = meta_df.iloc[survey_row+1:survey_row+2,0:2]
        self.survey.columns = ['Survey Code', 'Survey Name']
        
        subject_row = meta_df[meta_df['Cube Title'] =='Subject Code'].index.item()
        self.subject = meta_df.iloc[subject_row+1:subject_row+2,0:2]
        self.subject.columns = ['Subject Code', 'Subject Name']
        self.subject = self.subject.set_index('Subject Code')
        
        note_row = meta_df[meta_df['Cube Title'] =='Note ID'].index.item()
        correction_row = meta_df[meta_df['Cube Title'] =='Correction ID'].index.item()

        self.note = meta_df.iloc[note_row+1:correction_row,0:2]
        self.note.columns = ['Note ID', 'Note']
        
metadata = DatasetMetadata(meta_df)
metadata.subject

Unnamed: 0_level_0,Subject Name
Subject Code,Unnamed: 1_level_1
11,"Income, pensions, spending and wealth"
