# Tax Filers

In [1]:
import numpy as np
import pandas as pd
import requests, zipfile, io
import re
import os

In [2]:
%load_ext autoreload
%autoreload 2
pd.set_option('display.max_colwidth', 200)
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 400)

data_dir = '../data'
cache_dir = os.path.join(data_dir, 'cache')
import sys
sys.path.append('..')

In [5]:
from canadadata.statscan import to_wide_format, read_statscan_csv, StatscanDataset
from canadadata.io import unzip_data

taxfilers_dataset = StatscanDataset('https://www150.statcan.gc.ca/n1/tbl/csv/11100044-eng.zip', 
                                        pivot_column='Registered Retirement Savings Plan (RRSP) contributor characteristics')
tax_filers = taxfilers_dataset.get_wide_data(cache_dir=cache_dir, index_col='REF_DATE')

In [9]:
tax_filers

Unnamed: 0_level_0,GEO,75th percentile employment income of RRSP contributors,Average age of RRSP contributors,"Median RRSP contribution, female contributors","Median RRSP contribution, male contributors",Median RRSP contributions,Median employment income of RRSP contributors,Number of taxfilers,"Percentage of RRSP contributions, contributors aged 0 to 24 years","Percentage of RRSP contributions, contributors aged 25 to 34 years","Percentage of RRSP contributions, contributors aged 35 to 44 years","Percentage of RRSP contributions, contributors aged 45 to 54 years","Percentage of RRSP contributions, contributors aged 55 to 64 years","Percentage of RRSP contributions, contributors aged 65 years and over","Percentage of RRSP contributions, contributors with total income between $20,000 and $39,999","Percentage of RRSP contributions, contributors with total income between $40,000 and $59,999","Percentage of RRSP contributions, contributors with total income between $60,000 and $79,999","Percentage of RRSP contributions, contributors with total income of $80,000 or more","Percentage of RRSP contributions, contributors with total income of less than $20,000",Percentage of RRSP contributors aged 0 to 24 years,Percentage of RRSP contributors aged 25 to 34 years,Percentage of RRSP contributors aged 35 to 44 years,Percentage of RRSP contributors aged 45 to 54 years,Percentage of RRSP contributors aged 55 to 64 years,Percentage of RRSP contributors aged 65 years and over,"Percentage of RRSP contributors with total income between $20,000 and $39,999","Percentage of RRSP contributors with total income between $40,000 and $59,999","Percentage of RRSP contributors with total income between $60,000 and $79,999","Percentage of RRSP contributors with total income of $80,000 or more","Percentage of RRSP contributors with total income of less than $20,000",Percentage of female RRSP contributors,Percentage of female taxfilers,Percentage of male RRSP contributors,Percentage of male taxfilers,"Percentage of total RRSP contributions, female contributors","Percentage of total RRSP contributions, male contributors",Total RRSP contributions,Total RRSP contributors
REF_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2000,Canada,57700.0,43.0,2200.0,3000.0,2700.0,39400.0,21611830.0,2.0,16.0,30.0,31.0,18.0,4.0,,,,31.0,4.0,5.0,21.0,30.0,28.0,14.0,2.0,,,,12.0,11.0,45.0,51.0,55.0,49.0,39.0,61.0,29280163.0,6291170.0
2000,Newfoundland and Labrador,53400.0,43.0,2000.0,2700.0,2400.0,36400.0,383130.0,1.0,14.0,29.0,37.0,17.0,3.0,,,,28.0,5.0,3.0,21.0,30.0,32.0,12.0,1.0,,,,9.0,13.0,40.0,50.0,60.0,50.0,36.0,64.0,284661.0,64480.0
2000,"St. John's, Newfoundland and Labrador",56300.0,42.0,2100.0,2900.0,2500.0,39600.0,123930.0,1.0,15.0,31.0,34.0,17.0,3.0,,,,32.0,3.0,3.0,24.0,31.0,28.0,12.0,2.0,,,,11.0,10.0,46.0,52.0,54.0,48.0,42.0,58.0,137229.0,28480.0
2000,Prince Edward Island,46700.0,43.0,1800.0,2400.0,2000.0,31300.0,98370.0,2.0,13.0,28.0,33.0,20.0,4.0,,,,24.0,5.0,5.0,20.0,29.0,30.0,15.0,2.0,,,,7.0,15.0,45.0,51.0,55.0,49.0,39.0,61.0,87933.0,21400.0
2000,Nova Scotia,52100.0,43.0,1800.0,2500.0,2200.0,36400.0,658460.0,1.0,13.0,30.0,34.0,18.0,4.0,,,,32.0,4.0,4.0,20.0,31.0,29.0,13.0,2.0,,,,10.0,13.0,43.0,51.0,57.0,49.0,37.0,63.0,628727.0,144170.0
2000,"Halifax, Nova Scotia",56500.0,42.0,2000.0,2900.0,2400.0,40200.0,241570.0,1.0,16.0,31.0,31.0,16.0,4.0,,,,34.0,3.0,4.0,23.0,32.0,27.0,12.0,2.0,,,,12.0,10.0,47.0,52.0,53.0,48.0,42.0,58.0,314030.0,67700.0
2000,New Brunswick,51300.0,43.0,1800.0,2400.0,2100.0,35700.0,545100.0,1.0,13.0,27.0,34.0,22.0,3.0,,,,31.0,4.0,4.0,21.0,30.0,30.0,13.0,2.0,,,,9.0,12.0,42.0,51.0,58.0,49.0,36.0,64.0,471277.0,109400.0
2000,"Saint John, New Brunswick",56000.0,42.0,2000.0,3000.0,2400.0,38500.0,88840.0,1.0,14.0,28.0,33.0,21.0,3.0,,,,39.0,3.0,4.0,22.0,31.0,28.0,13.0,2.0,,,,12.0,11.0,42.0,52.0,58.0,48.0,33.0,67.0,109483.0,22700.0
2000,Quebec,53800.0,42.0,2000.0,2900.0,2400.0,36800.0,5406500.0,2.0,16.0,31.0,30.0,17.0,3.0,,,,27.0,4.0,5.0,21.0,31.0,28.0,13.0,2.0,,,,9.0,11.0,44.0,51.0,56.0,49.0,38.0,62.0,6565568.0,1532980.0
2000,"Saguenay, Quebec",59000.0,43.0,2000.0,2800.0,2500.0,41200.0,116390.0,2.0,13.0,32.0,33.0,19.0,2.0,,,,22.0,3.0,4.0,17.0,33.0,32.0,13.0,1.0,,,,8.0,9.0,36.0,49.0,64.0,51.0,32.0,68.0,128025.0,30560.0


In [8]:
tax_filers[tax_filers.index==2016]

dtype('int64')

## Canada

In [11]:
tax_filers_ca = tax_filers.query("GEO =='Canada'")

In [12]:
tax_filers_ca

Unnamed: 0_level_0,GEO,75th percentile employment income of RRSP contributors,Average age of RRSP contributors,"Median RRSP contribution, female contributors","Median RRSP contribution, male contributors",Median RRSP contributions,Median employment income of RRSP contributors,Number of taxfilers,"Percentage of RRSP contributions, contributors aged 0 to 24 years","Percentage of RRSP contributions, contributors aged 25 to 34 years","Percentage of RRSP contributions, contributors aged 35 to 44 years","Percentage of RRSP contributions, contributors aged 45 to 54 years","Percentage of RRSP contributions, contributors aged 55 to 64 years","Percentage of RRSP contributions, contributors aged 65 years and over","Percentage of RRSP contributions, contributors with total income between $20,000 and $39,999","Percentage of RRSP contributions, contributors with total income between $40,000 and $59,999","Percentage of RRSP contributions, contributors with total income between $60,000 and $79,999","Percentage of RRSP contributions, contributors with total income of $80,000 or more","Percentage of RRSP contributions, contributors with total income of less than $20,000",Percentage of RRSP contributors aged 0 to 24 years,Percentage of RRSP contributors aged 25 to 34 years,Percentage of RRSP contributors aged 35 to 44 years,Percentage of RRSP contributors aged 45 to 54 years,Percentage of RRSP contributors aged 55 to 64 years,Percentage of RRSP contributors aged 65 years and over,"Percentage of RRSP contributors with total income between $20,000 and $39,999","Percentage of RRSP contributors with total income between $40,000 and $59,999","Percentage of RRSP contributors with total income between $60,000 and $79,999","Percentage of RRSP contributors with total income of $80,000 or more","Percentage of RRSP contributors with total income of less than $20,000",Percentage of female RRSP contributors,Percentage of female taxfilers,Percentage of male RRSP contributors,Percentage of male taxfilers,"Percentage of total RRSP contributions, female contributors","Percentage of total RRSP contributions, male contributors",Total RRSP contributions,Total RRSP contributors
REF_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2000,Canada,57700.0,43.0,2200.0,3000.0,2700.0,39400.0,21611830.0,2.0,16.0,30.0,31.0,18.0,4.0,,,,31.0,4.0,5.0,21.0,30.0,28.0,14.0,2.0,,,,12.0,11.0,45.0,51.0,55.0,49.0,39.0,61.0,29280163.0,6291170.0
2001,Canada,59100.0,43.0,2200.0,3000.0,2600.0,40400.0,21886860.0,2.0,16.0,29.0,31.0,19.0,4.0,,,,33.0,3.0,5.0,21.0,30.0,28.0,14.0,2.0,,,,13.0,10.0,45.0,51.0,55.0,49.0,38.0,62.0,28438914.0,6241050.0
2002,Canada,61000.0,43.0,2100.0,3000.0,2500.0,41600.0,21979210.0,2.0,15.0,28.0,31.0,20.0,4.0,,,,35.0,3.0,4.0,21.0,29.0,28.0,15.0,2.0,,,,14.0,10.0,46.0,51.0,54.0,49.0,39.0,61.0,27072812.0,5991440.0
2003,Canada,63100.0,43.0,2100.0,3000.0,2600.0,43000.0,22465770.0,2.0,15.0,27.0,31.0,21.0,4.0,,,,37.0,3.0,4.0,20.0,28.0,29.0,16.0,2.0,,,,15.0,9.0,46.0,51.0,54.0,49.0,38.0,62.0,27561305.0,5948340.0
2004,Canada,65800.0,44.0,2200.0,3000.0,2600.0,44600.0,22725310.0,2.0,14.0,26.0,32.0,22.0,4.0,,,,40.0,3.0,4.0,20.0,28.0,29.0,16.0,2.0,,,,17.0,8.0,46.0,52.0,54.0,48.0,38.0,62.0,28788102.0,6002350.0
2005,Canada,68100.0,44.0,2180.0,3070.0,2630.0,45900.0,23311690.0,2.0,14.0,26.0,32.0,23.0,4.0,,,,44.0,2.0,4.0,20.0,27.0,30.0,17.0,3.0,,,,19.0,8.0,46.0,52.0,54.0,48.0,38.0,62.0,30581252.0,6135980.0
2006,Canada,70600.0,44.0,2250.0,3200.0,2730.0,47500.0,23338370.0,2.0,13.0,25.0,33.0,23.0,4.0,,,,47.0,2.0,4.0,20.0,26.0,30.0,18.0,3.0,,,,21.0,7.0,46.0,52.0,54.0,48.0,38.0,62.0,32350792.0,6196050.0
2007,Canada,73210.0,44.0,2300.0,3260.0,2780.0,49210.0,23725970.0,1.0,13.0,24.0,32.0,24.0,6.0,12.0,20.0,18.0,49.0,2.0,4.0,19.0,25.0,30.0,18.0,3.0,25.0,28.0,18.0,23.0,6.0,46.0,52.0,54.0,48.0,39.0,61.0,34057715.0,6292480.0
2008,Canada,76200.0,45.0,2240.0,3220.0,2700.0,51090.0,24035930.0,1.0,13.0,22.0,32.0,25.0,6.0,10.0,19.0,17.0,52.0,2.0,4.0,19.0,25.0,30.0,19.0,3.0,23.0,27.0,18.0,25.0,6.0,47.0,52.0,53.0,48.0,39.0,61.0,33314040.0,6178900.0
2009,Canada,76920.0,45.0,2260.0,3200.0,2680.0,51570.0,24320760.0,1.0,12.0,21.0,32.0,26.0,6.0,10.0,18.0,17.0,53.0,2.0,3.0,19.0,24.0,30.0,20.0,4.0,23.0,28.0,18.0,26.0,5.0,47.0,52.0,53.0,48.0,39.0,61.0,32999435.0,5967710.0


## Ontario

In [15]:
tax_filers.query("GEO =='Ontario'")

Unnamed: 0_level_0,GEO,75th percentile employment income of RRSP contributors,Average age of RRSP contributors,"Median RRSP contribution, female contributors","Median RRSP contribution, male contributors",Median RRSP contributions,Median employment income of RRSP contributors,Number of taxfilers,"Percentage of RRSP contributions, contributors aged 0 to 24 years","Percentage of RRSP contributions, contributors aged 25 to 34 years","Percentage of RRSP contributions, contributors aged 35 to 44 years","Percentage of RRSP contributions, contributors aged 45 to 54 years","Percentage of RRSP contributions, contributors aged 55 to 64 years","Percentage of RRSP contributions, contributors aged 65 years and over","Percentage of RRSP contributions, contributors with total income between $20,000 and $39,999","Percentage of RRSP contributions, contributors with total income between $40,000 and $59,999","Percentage of RRSP contributions, contributors with total income between $60,000 and $79,999","Percentage of RRSP contributions, contributors with total income of $80,000 or more","Percentage of RRSP contributions, contributors with total income of less than $20,000",Percentage of RRSP contributors aged 0 to 24 years,Percentage of RRSP contributors aged 25 to 34 years,Percentage of RRSP contributors aged 35 to 44 years,Percentage of RRSP contributors aged 45 to 54 years,Percentage of RRSP contributors aged 55 to 64 years,Percentage of RRSP contributors aged 65 years and over,"Percentage of RRSP contributors with total income between $20,000 and $39,999","Percentage of RRSP contributors with total income between $40,000 and $59,999","Percentage of RRSP contributors with total income between $60,000 and $79,999","Percentage of RRSP contributors with total income of $80,000 or more","Percentage of RRSP contributors with total income of less than $20,000",Percentage of female RRSP contributors,Percentage of female taxfilers,Percentage of male RRSP contributors,Percentage of male taxfilers,"Percentage of total RRSP contributions, female contributors","Percentage of total RRSP contributions, male contributors",Total RRSP contributions,Total RRSP contributors
REF_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2000,Ontario,61700.0,43.0,2400.0,3300.0,2900.0,42100.0,8083210.0,2.0,17.0,30.0,30.0,18.0,4.0,,,,35.0,3.0,5.0,22.0,30.0,27.0,14.0,3.0,,,,14.0,10.0,46.0,51.0,54.0,49.0,40.0,60.0,12348689.0,2491110.0
2001,Ontario,63100.0,43.0,2400.0,3200.0,2800.0,43200.0,8229640.0,2.0,17.0,29.0,30.0,19.0,4.0,,,,37.0,3.0,4.0,22.0,30.0,27.0,14.0,2.0,,,,15.0,9.0,46.0,52.0,54.0,48.0,39.0,61.0,11969530.0,2462280.0
2002,Ontario,65500.0,43.0,2200.0,3100.0,2700.0,44500.0,8265800.0,2.0,16.0,28.0,30.0,20.0,4.0,,,,39.0,3.0,4.0,21.0,30.0,27.0,15.0,3.0,,,,16.0,9.0,46.0,52.0,54.0,48.0,40.0,60.0,11227411.0,2349330.0
2003,Ontario,67500.0,43.0,2300.0,3200.0,2700.0,46000.0,8472870.0,1.0,15.0,28.0,30.0,21.0,4.0,,,,42.0,3.0,4.0,21.0,29.0,28.0,16.0,3.0,,,,18.0,8.0,46.0,52.0,54.0,48.0,39.0,61.0,11470632.0,2322730.0
2004,Ontario,70400.0,44.0,2300.0,3200.0,2800.0,47800.0,8574350.0,1.0,14.0,27.0,31.0,22.0,5.0,,,,45.0,2.0,4.0,20.0,29.0,28.0,17.0,3.0,,,,20.0,8.0,46.0,52.0,54.0,48.0,39.0,61.0,11938130.0,2342390.0
2005,Ontario,72900.0,44.0,2340.0,3240.0,2800.0,49100.0,8804910.0,1.0,14.0,26.0,31.0,23.0,5.0,,,,47.0,2.0,4.0,20.0,28.0,29.0,17.0,3.0,,,,22.0,7.0,47.0,52.0,53.0,48.0,40.0,60.0,12518005.0,2392350.0
2006,Ontario,74600.0,44.0,2400.0,3320.0,2860.0,50300.0,8789320.0,1.0,13.0,26.0,32.0,24.0,5.0,,,,50.0,2.0,4.0,19.0,27.0,29.0,18.0,3.0,,,,24.0,6.0,47.0,52.0,53.0,48.0,39.0,61.0,13064314.0,2392740.0
2007,Ontario,77190.0,44.0,2400.0,3370.0,2880.0,51750.0,8929740.0,1.0,13.0,24.0,32.0,24.0,6.0,10.0,18.0,17.0,52.0,2.0,3.0,19.0,27.0,29.0,18.0,3.0,22.0,27.0,18.0,26.0,6.0,47.0,52.0,53.0,48.0,39.0,61.0,13618980.0,2411460.0
2008,Ontario,79880.0,45.0,2360.0,3250.0,2750.0,53410.0,9029190.0,1.0,12.0,23.0,31.0,25.0,6.0,9.0,17.0,17.0,55.0,2.0,3.0,19.0,26.0,30.0,19.0,4.0,21.0,27.0,18.0,28.0,6.0,47.0,52.0,53.0,48.0,40.0,60.0,13205055.0,2352710.0
2009,Ontario,80380.0,45.0,2370.0,3230.0,2720.0,53560.0,9194810.0,1.0,12.0,22.0,32.0,27.0,7.0,9.0,17.0,16.0,56.0,2.0,3.0,18.0,25.0,30.0,20.0,4.0,21.0,27.0,18.0,28.0,6.0,48.0,52.0,52.0,48.0,40.0,60.0,13199920.0,2263780.0


## Plots

In [14]:
import matplotlib.style as style

style.use('fivethirtyeight')