In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

In [2]:
file_path = Path('./Tables/mental/13100465.csv')
mental = pd.read_csv(file_path)
mental.head()

Unnamed: 0,REF_DATE,GEO,DGUID,Age group,Sex,Indicators,Characteristics,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2002,Canada,2016A000011124,"Total, 15 years and over",Both sexes,"Major depressive episode, life",Number of persons,Number,223,units,0,v73953257,1.1.1.5.1,3037049.0,,,,0
1,2002,Canada,2016A000011124,"Total, 15 years and over",Both sexes,"Major depressive episode, life","Low 95% confidence interval, number of persons",Number,223,units,0,v73953258,1.1.1.5.2,2913405.0,,,,0
2,2002,Canada,2016A000011124,"Total, 15 years and over",Both sexes,"Major depressive episode, life","High 95% confidence interval, number of persons",Number,223,units,0,v73953259,1.1.1.5.3,3160692.0,,,,0
3,2002,Canada,2016A000011124,"Total, 15 years and over",Both sexes,"Major depressive episode, life",Coefficient of variation for number of persons,Number,223,units,0,v73953260,1.1.1.5.4,2.1,,,,1
4,2002,Canada,2016A000011124,"Total, 15 years and over",Both sexes,"Major depressive episode, life",Percent,Percent,239,units,0,v73953261,1.1.1.5.5,12.2,,,,1


In [3]:
# clean the unnecessary collumns
mental = mental.drop(['DGUID','UOM','UOM_ID','SCALAR_FACTOR','SCALAR_ID','VECTOR','COORDINATE','STATUS','SYMBOL','TERMINATED','DECIMALS'], axis=1)
mental.head()

Unnamed: 0,REF_DATE,GEO,Age group,Sex,Indicators,Characteristics,VALUE
0,2002,Canada,"Total, 15 years and over",Both sexes,"Major depressive episode, life",Number of persons,3037049.0
1,2002,Canada,"Total, 15 years and over",Both sexes,"Major depressive episode, life","Low 95% confidence interval, number of persons",2913405.0
2,2002,Canada,"Total, 15 years and over",Both sexes,"Major depressive episode, life","High 95% confidence interval, number of persons",3160692.0
3,2002,Canada,"Total, 15 years and over",Both sexes,"Major depressive episode, life",Coefficient of variation for number of persons,2.1
4,2002,Canada,"Total, 15 years and over",Both sexes,"Major depressive episode, life",Percent,12.2


In [4]:
#Filter by Characteristics
caract_filter = mental['Characteristics'] == 'Number of persons'
mental = mental.loc[caract_filter]
mental = mental.drop(['Characteristics'], axis=1)

In [5]:
#Filter by Sex - Exclude both sexes
sex_filter = mental['Sex'] != 'Both sexes'
mental = mental.loc[sex_filter]

In [6]:
#Filter only Provinces
geo_filter = mental['GEO'] != 'Canada'
mental = mental.loc[geo_filter]

In [7]:
#Filter date between 2002 and 2012
date_filter = (mental['REF_DATE']>2001) & (mental['REF_DATE']<2013)
mental = mental.loc[date_filter]
mental.head()

Unnamed: 0,REF_DATE,GEO,Age group,Sex,Indicators,VALUE
2128,2002,Atlantic Provinces,"Total, 15 years and over",Males,"Major depressive episode, life",69712.0
2136,2002,Atlantic Provinces,"Total, 15 years and over",Males,"Major depressive episode, 12 months",25589.0
2144,2002,Atlantic Provinces,"Total, 15 years and over",Males,"Cannabis use, life",450776.0
2152,2002,Atlantic Provinces,"Total, 15 years and over",Males,"Cannabis use, 12 months",152564.0
2160,2002,Atlantic Provinces,"Total, 15 years and over",Males,"Suicidal thoughts, life",94268.0


In [8]:
mental['Age group'].value_counts()

Total, 15 years and over    1272
15 to 24 years              1272
25 to 64 years              1272
25 to 44 years              1272
45 to 64 years              1272
65 years and over           1272
Name: Age group, dtype: int64

In [9]:
#Filter Age
age_filter = mental['Age group'].isin(['15 to 24 years', '25 to 44 years','45 to 64 years','65 years and over'])
mental = mental.loc[age_filter]
mental.head(20)

Unnamed: 0,REF_DATE,GEO,Age group,Sex,Indicators,VALUE
2464,2002,Atlantic Provinces,15 to 24 years,Males,"Major depressive episode, life",5809.0
2472,2002,Atlantic Provinces,15 to 24 years,Males,"Major depressive episode, 12 months",4195.0
2480,2002,Atlantic Provinces,15 to 24 years,Males,"Cannabis use, life",89412.0
2488,2002,Atlantic Provinces,15 to 24 years,Males,"Cannabis use, 12 months",56853.0
2496,2002,Atlantic Provinces,15 to 24 years,Males,"Suicidal thoughts, life",16883.0
2504,2002,Atlantic Provinces,15 to 24 years,Males,"Suicidal thoughts, 12 months",7199.0
2512,2002,Atlantic Provinces,15 to 24 years,Males,"Perceived mental health, very good or excellent",125714.0
2520,2002,Atlantic Provinces,15 to 24 years,Males,"Perceived mental health, good",27568.0
2528,2002,Atlantic Provinces,15 to 24 years,Males,"Perceived mental health, fair or poor",7022.0
2536,2002,Atlantic Provinces,15 to 24 years,Males,"Self-rated work stress, days quite a bit or ex...",11787.0


In [10]:
mental['Age group']=mental['Age group'].replace({'15 to 24 years': 1})
mental['Age group']=mental['Age group'].replace({'25 to 44 years': 2})
mental['Age group']=mental['Age group'].replace({'45 to 64 years': 3})
mental['Age group']=mental['Age group'].replace({'65 years and over': 4})
mental.head(10)

Unnamed: 0,REF_DATE,GEO,Age group,Sex,Indicators,VALUE
2464,2002,Atlantic Provinces,1,Males,"Major depressive episode, life",5809.0
2472,2002,Atlantic Provinces,1,Males,"Major depressive episode, 12 months",4195.0
2480,2002,Atlantic Provinces,1,Males,"Cannabis use, life",89412.0
2488,2002,Atlantic Provinces,1,Males,"Cannabis use, 12 months",56853.0
2496,2002,Atlantic Provinces,1,Males,"Suicidal thoughts, life",16883.0
2504,2002,Atlantic Provinces,1,Males,"Suicidal thoughts, 12 months",7199.0
2512,2002,Atlantic Provinces,1,Males,"Perceived mental health, very good or excellent",125714.0
2520,2002,Atlantic Provinces,1,Males,"Perceived mental health, good",27568.0
2528,2002,Atlantic Provinces,1,Males,"Perceived mental health, fair or poor",7022.0
2536,2002,Atlantic Provinces,1,Males,"Self-rated work stress, days quite a bit or ex...",11787.0


In [11]:
#Filter Reason 'Permanent layoff'
mental_final = mental.groupby(['REF_DATE','GEO','Sex','Age group','Indicators'])['VALUE'].sum()
mental_final = pd.DataFrame(mental_final)
mental_final.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,VALUE
REF_DATE,GEO,Sex,Age group,Indicators,Unnamed: 5_level_1
2002,Alberta,Females,1,"Cannabis use, 12 months",63387.0
2002,Alberta,Females,1,"Cannabis use, life",106926.0
2002,Alberta,Females,1,"Eating disorder, current diagnosed condition",0.0
2002,Alberta,Females,1,"Major depressive episode, 12 months",16289.0
2002,Alberta,Females,1,"Major depressive episode, life",29945.0
2002,Alberta,Females,1,"Perceived mental health, fair or poor",23539.0
2002,Alberta,Females,1,"Perceived mental health, good",42278.0
2002,Alberta,Females,1,"Perceived mental health, very good or excellent",154101.0
2002,Alberta,Females,1,"Post-traumatic stress disorder, current diagnosed condition",0.0
2002,Alberta,Females,1,"Self-rated work stress, days a bit stressful",68978.0


In [12]:
mental_final.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 5088 entries, (2002, 'Alberta', 'Females', 1, 'Cannabis use, 12 months') to (2012, 'Saskatchewan', 'Males', 4, 'Suicidal thoughts, life')
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   VALUE   5088 non-null   float64
dtypes: float64(1)
memory usage: 65.3+ KB


In [13]:
#mental_final.to_csv('./Tables/mental_final.csv', index=True)