In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

In [2]:
file_path = Path('./Tables/job/14100126.csv')
job = pd.read_csv(file_path)
job.head()

Unnamed: 0,REF_DATE,GEO,DGUID,Reason,Characteristics,Sex,Age group,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,1976,Canada,2016A000011124,"Total, all reasons","Total, unemployed and not in the labour force",Both sexes,15 years and over,Persons,249,thousands,3,v54536566,1.1.1.1.1,7310.6,,,,1
1,1976,Canada,2016A000011124,"Total, all reasons","Total, unemployed and not in the labour force",Both sexes,15 to 24 years,Persons,249,thousands,3,v54536567,1.1.1.1.2,2016.8,,,,1
2,1976,Canada,2016A000011124,"Total, all reasons","Total, unemployed and not in the labour force",Both sexes,25 years and over,Persons,249,thousands,3,v54536568,1.1.1.1.3,5293.7,,,,1
3,1976,Canada,2016A000011124,"Total, all reasons","Total, unemployed and not in the labour force",Both sexes,25 to 54 years,Persons,249,thousands,3,v54536569,1.1.1.1.4,2649.1,,,,1
4,1976,Canada,2016A000011124,"Total, all reasons","Total, unemployed and not in the labour force",Both sexes,55 years and over,Persons,249,thousands,3,v54536570,1.1.1.1.5,2644.6,,,,1


In [3]:
# clean the unnecessary collumns
job = job.drop(['Characteristics','DGUID','UOM','UOM_ID','SCALAR_FACTOR','SCALAR_ID','VECTOR','COORDINATE','STATUS','SYMBOL','TERMINATED','DECIMALS'], axis=1)
job.dropna()
job.head()

Unnamed: 0,REF_DATE,GEO,Reason,Sex,Age group,VALUE
0,1976,Canada,"Total, all reasons",Both sexes,15 years and over,7310.6
1,1976,Canada,"Total, all reasons",Both sexes,15 to 24 years,2016.8
2,1976,Canada,"Total, all reasons",Both sexes,25 years and over,5293.7
3,1976,Canada,"Total, all reasons",Both sexes,25 to 54 years,2649.1
4,1976,Canada,"Total, all reasons",Both sexes,55 years and over,2644.6


In [4]:
#Filter by Sex - Exclude both sexes
sex_filter = job['Sex'] != 'Both sexes'
job = job.loc[sex_filter]

In [5]:
#Filter only Provinces
geo_filter = job['GEO'] != 'Canada'
job = job.loc[geo_filter]

In [6]:
#Filter date between 2002 and 2012
date_filter = (job['REF_DATE']>2001) & (job['REF_DATE']<2013)
job = job.loc[date_filter]
job.head()

Unnamed: 0,REF_DATE,GEO,Reason,Sex,Age group,VALUE
235060,2002,Newfoundland and Labrador,"Total, all reasons",Males,15 years and over,100.7
235061,2002,Newfoundland and Labrador,"Total, all reasons",Males,15 to 24 years,21.4
235062,2002,Newfoundland and Labrador,"Total, all reasons",Males,25 years and over,79.3
235063,2002,Newfoundland and Labrador,"Total, all reasons",Males,25 to 54 years,35.8
235064,2002,Newfoundland and Labrador,"Total, all reasons",Males,55 years and over,43.5


In [7]:
job['Age group'].value_counts()

15 years and over    8580
15 to 24 years       8580
25 years and over    8580
25 to 54 years       8580
55 years and over    8580
55 to 64 years       8580
65 years and over    8580
Name: Age group, dtype: int64

In [8]:
#Filter Age
age_filter = job['Age group'].isin(['15 to 24 years', '25 to 54 years','55 to 64 years','65 years and over'])
job = job.loc[age_filter]
job.head(10)

Unnamed: 0,REF_DATE,GEO,Reason,Sex,Age group,VALUE
235061,2002,Newfoundland and Labrador,"Total, all reasons",Males,15 to 24 years,21.4
235063,2002,Newfoundland and Labrador,"Total, all reasons",Males,25 to 54 years,35.8
235065,2002,Newfoundland and Labrador,"Total, all reasons",Males,55 to 64 years,16.8
235066,2002,Newfoundland and Labrador,"Total, all reasons",Males,65 years and over,26.7
235068,2002,Newfoundland and Labrador,"Total, all reasons",Females,15 to 24 years,21.7
235070,2002,Newfoundland and Labrador,"Total, all reasons",Females,25 to 54 years,43.9
235072,2002,Newfoundland and Labrador,"Total, all reasons",Females,55 to 64 years,20.6
235073,2002,Newfoundland and Labrador,"Total, all reasons",Females,65 years and over,32.6
235082,2002,Newfoundland and Labrador,"Total, all reasons",Males,15 to 24 years,4.9
235084,2002,Newfoundland and Labrador,"Total, all reasons",Males,25 to 54 years,16.8


In [9]:
job['Age group']=job['Age group'].replace({'15 to 24 years': 1})
job['Age group']=job['Age group'].replace({'25 to 54 years': 2})
job['Age group']=job['Age group'].replace({'55 to 64 years': 3})
job['Age group']=job['Age group'].replace({'65 years and over': 4})
job.head(15)

Unnamed: 0,REF_DATE,GEO,Reason,Sex,Age group,VALUE
235061,2002,Newfoundland and Labrador,"Total, all reasons",Males,1,21.4
235063,2002,Newfoundland and Labrador,"Total, all reasons",Males,2,35.8
235065,2002,Newfoundland and Labrador,"Total, all reasons",Males,3,16.8
235066,2002,Newfoundland and Labrador,"Total, all reasons",Males,4,26.7
235068,2002,Newfoundland and Labrador,"Total, all reasons",Females,1,21.7
235070,2002,Newfoundland and Labrador,"Total, all reasons",Females,2,43.9
235072,2002,Newfoundland and Labrador,"Total, all reasons",Females,3,20.6
235073,2002,Newfoundland and Labrador,"Total, all reasons",Females,4,32.6
235082,2002,Newfoundland and Labrador,"Total, all reasons",Males,1,4.9
235084,2002,Newfoundland and Labrador,"Total, all reasons",Males,2,16.8


In [10]:
job['VALUE'] = job['VALUE']*1000

In [11]:
#Filter Reason 'Permanent layoff'
job_final = job.groupby(['REF_DATE','GEO','Sex','Age group','Reason'])['VALUE'].sum()
job_final = pd.DataFrame(job_final)
job_final.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,VALUE
REF_DATE,GEO,Sex,Age group,Reason,Unnamed: 5_level_1
2002,Alberta,Females,1,Dissatisfied,12300.0
2002,Alberta,Females,1,Going to school,42600.0
2002,Alberta,Females,1,Have not worked in last year,18600.0
2002,Alberta,Females,1,Job leavers,71200.0
2002,Alberta,Females,1,Jobs losers,21500.0
2002,Alberta,Females,1,Never worked,49700.0
2002,Alberta,Females,1,Other reasons,4000.0
2002,Alberta,Females,1,Own illness or disability,0.0
2002,Alberta,Females,1,Permanent layoff,21300.0
2002,Alberta,Females,1,Personal or family reasons,8200.0


In [12]:
#Pivot Reason variable
job_final = job_final.pivot_table(index=['REF_DATE','GEO','Sex','Age group'],columns='Reason',values='VALUE')
job_final.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Reason,Dissatisfied,Going to school,Have not worked in last year,Job leavers,Jobs losers,Never worked,Other reasons,Own illness or disability,Permanent layoff,Personal or family reasons,Retired,Temporary layoff,"Total, all reasons"
REF_DATE,GEO,Sex,Age group,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2002,Alberta,Females,1,12300.0,42600.0,18600.0,71200.0,21500.0,49700.0,4000.0,0.0,21300.0,8200.0,0.0,0.0,163700.0
2002,Alberta,Females,2,8800.0,9200.0,174900.0,66200.0,38100.0,20700.0,12700.0,12900.0,36300.0,19900.0,0.0,0.0,300400.0
2002,Alberta,Females,3,0.0,0.0,94200.0,8800.0,5000.0,9600.0,0.0,0.0,4900.0,0.0,4700.0,0.0,119700.0
2002,Alberta,Females,4,0.0,0.0,245600.0,4600.0,0.0,50200.0,0.0,0.0,0.0,0.0,3600.0,0.0,301800.0
2002,Alberta,Males,1,14200.0,54100.0,15400.0,79600.0,29800.0,44200.0,2100.0,1600.0,27100.0,1900.0,0.0,0.0,169200.0
2002,Alberta,Males,2,8800.0,10300.0,46900.0,36500.0,55800.0,6800.0,4700.0,7000.0,48900.0,0.0,0.0,7000.0,146200.0
2002,Alberta,Males,3,0.0,0.0,48000.0,9500.0,8200.0,0.0,0.0,0.0,7700.0,0.0,5400.0,0.0,69000.0
2002,Alberta,Males,4,0.0,0.0,213700.0,7700.0,0.0,6200.0,0.0,0.0,0.0,0.0,5900.0,0.0,229900.0
2002,British Columbia,Females,1,9000.0,45000.0,32300.0,71800.0,31000.0,104500.0,6400.0,3200.0,29800.0,6000.0,0.0,0.0,239600.0
2002,British Columbia,Females,2,10000.0,8900.0,306400.0,69400.0,75200.0,49000.0,20900.0,9600.0,71000.0,16700.0,0.0,4400.0,500000.0


In [13]:
#Filter Reason 'Permanent layoff'
#reason_filter = job['Reason'] == 'Permanent layoff'
#job = job.loc[reason_filter]
#job = job.drop(['Reason'], axis=1)
#job.head()

In [14]:
job_final = job_final.drop(['Total, all reasons'], axis=1)

In [15]:
job_final.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 880 entries, (2002, 'Alberta', 'Females', 1) to (2012, 'Saskatchewan', 'Males', 4)
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Dissatisfied                  880 non-null    float64
 1   Going to school               880 non-null    float64
 2   Have not worked in last year  880 non-null    float64
 3   Job leavers                   880 non-null    float64
 4   Jobs losers                   880 non-null    float64
 5   Never worked                  880 non-null    float64
 6   Other reasons                 880 non-null    float64
 7   Own illness or disability     880 non-null    float64
 8   Permanent layoff              880 non-null    float64
 9   Personal or family reasons    880 non-null    float64
 10  Retired                       880 non-null    float64
 11  Temporary layoff              880 non-null    float64
dtypes: 

In [16]:
#job_final.to_csv('./Tables/job_final_Reason.csv', index=True)