In [None]:
from statscantools.table import Table
from statscantools.wds import wds_fetch_table
import pandas as pd

### About the dataset 

This survey looks at registrations of new motor vehicles in the provinces and territories in a given year.

- Estimates for Newfoundland and Labrador, Nova Scotia and Alberta are currently unavailable because of contractual limitations of the existing data sharing agreement. However, they are included in the Canadian total.
- Estimates for British Columbia include the territories.


In [None]:
wds_fetch_table('20100024')
car_table = Table('20100024')
print('Title : ', car_table.title)

In [None]:
df = car_table.merged_dataframe
df['year'] = df['ref_date'].str[:4].astype(int)
df['month'] = df['ref_date'].str[5:7].astype(int)

In [None]:
"""
Note from Statistics Canada :
Estimates for Newfoundland and Labrador, Nova Scotia and Alberta are currently unavailable because of contractual limitations of the existing data sharing agreement. 
However, they are included in the Canadian total. 
"""

# remove cubed rows, keeping Canada total per above
df = df[
        #(car_df['dim_1_depth'] == car_df['dim_1_depth'].max()) & 
        (df['dim_2_depth'] == df['dim_2_depth'].max()) &
        (df['dim_3_depth'] == df['dim_3_depth'].max())
        ].copy()

In [None]:
columns_filter = ['year', 'month', 'geo', 'fuel type', 'vehicle type', 'value']
df = df[columns_filter]
df.head()

In [None]:
piv_cols = ['geo']
piv_index = [ 'year', 'month',  'vehicle type', 'fuel type']
piv_df = df.pivot(columns=piv_cols, index=piv_index)
piv_df.columns = piv_df.columns.get_level_values(1)

In [None]:
piv_df.head()

In [None]:
# Calculate the other provinces from the Canada total minus the provinces we know
piv_df['Other Provinces'] = piv_df['Canada'] * 2 - piv_df.sum(axis=1)

In [None]:
piv_df.drop(['Canada', 'Newfoundland and Labrador','Alberta','Nova Scotia'], axis=1, inplace=True)

In [None]:
piv_df.head()

In [None]:
stack = piv_df.stack()
stack.name = 'units'
output_df = stack.reset_index()

In [None]:
output_df.to_csv('cardata.csv', index=False)