In [120]:
#Required packages
import pandas as pd
import numpy as np

#Read in November data.
nov_2019 = pd.read_stata("time_use_2019-11.dta", convert_categoricals=True)

#Read in the April data.
apr_2020 = pd.read_csv("time_use_2020-04.csv")

#Get the data type of each column in dataframe
def get_type(dataframe):
    return dataframe.dtypes

In [121]:
#Add date column to dataframes
np.repeat(np.array('nov-2019'), 88, axis = 0)
nov_2019['date'] = np.repeat(np.array('nov_2019'), 88, axis = 0) 
#nov_2019


np.repeat(np.array('apr-2020'), 106, axis = 0)
apr_2020['date'] = np.repeat(np.array('apr-2020'), 106, axis = 0) 
#apr_2020

In [122]:
# Make all columns have a sensible data type
nov_2019["ind_id"] = nov_2019.nomem_encr.astype(int)
nov_2019["hh_id"] = nov_2019.nohouse_encr.astype(int)
apr_2020["geslacht"] = apr_2020.geslacht.astype('category')
apr_2020["ind_id"] = apr_2020.nomem_encr
apr_2020["hh_id"] = apr_2020.nohouse_encr

In [123]:
#Rename columns with sensible names
nov_2019 = nov_2019.rename(columns = {"geslacht":"gender", "v1q1_v1col1":"total_working_hours", 
                                      "v1q5_v1col1":"childcare_hours"})
apr_2020 = apr_2020.rename(columns = {"geslacht":"gender","v1q1_v1col1":"workplace_working_hours", 
                                      "v1q1a_v1col1":"home_office_no_kid_hours",
                                      "v1q1b_v1col1":"home_office_kid_responsibility_hours", 
                                      "v1q1c_v1col1":"home_office_kid_no_responsibility_hours",
                                      "v1q5_v1col1":"childcare_residual_hours",
                                      "v1q5a_v1col1":"homeschooling_hours"})


In [124]:
#creating total working hours column for apr 2020
apr_2020['total_working_hours'] = apr_2020[["workplace_working_hours", "home_office_no_kid_hours", "home_office_kid_responsibility_hours", "home_office_kid_no_responsibility_hours"]].sum(axis = 1, skipna = True)
apr_2020

Unnamed: 0,gender,workplace_working_hours,home_office_no_kid_hours,home_office_kid_responsibility_hours,home_office_kid_no_responsibility_hours,childcare_residual_hours,homeschooling_hours,nohouse_encr,nomem_encr,date,ind_id,hh_id,total_working_hours
0,Man,0.0,,0.0,0.0,0.0,0.0,1049420,1687033,apr-2020,1687033,1049420,0.0
1,Vrouw,3.0,,0.0,0.0,0.0,0.0,1049420,1662353,apr-2020,1662353,1049420,3.0
2,Man,47.0,,4.0,0.0,4.0,0.0,1011033,1631191,apr-2020,1631191,1011033,51.0
3,Vrouw,8.0,,0.0,8.0,0.0,0.0,1011033,1687630,apr-2020,1687630,1011033,16.0
4,Man,36.0,,0.0,0.0,0.0,0.0,1047651,1746405,apr-2020,1746405,1047651,36.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,Man,0.0,0.0,,,,,1156059,1696174,apr-2020,1696174,1156059,0.0
102,Vrouw,16.0,,0.0,0.0,15.0,20.0,1144468,1678816,apr-2020,1678816,1144468,16.0
103,Man,0.0,,8.0,24.0,10.0,10.0,1144468,1668177,apr-2020,1668177,1144468,32.0
104,Man,0.0,0.0,,,,,1159704,1655142,apr-2020,1655142,1159704,0.0


In [125]:
#Modify the gender variable to have male and female categories
nov_2019_cat = pd.Categorical(nov_2019["gender"])
nov_2019_cat =nov_2019_cat.rename_categories(["Male", "Female"])
nov_2019["gender"] = nov_2019_cat
apr_2020_cat = pd.Categorical(apr_2020["gender"])
apr_2020_cat = apr_2020_cat.rename_categories(["Male", "Female"])
apr_2020["gender"] = apr_2020_cat

In [126]:
#Dropping the useless columns
nov_2019 = nov_2019.drop(columns=["nomem_encr", "nohouse_encr"])
apr_2020 = apr_2020.drop(columns=["nomem_encr", "nohouse_encr"])

In [127]:
#Appending apr data to nov data
time_use_panel = apr_2020.append(nov_2019, ignore_index=True)

time_use_panel

Unnamed: 0,gender,workplace_working_hours,home_office_no_kid_hours,home_office_kid_responsibility_hours,home_office_kid_no_responsibility_hours,childcare_residual_hours,homeschooling_hours,date,ind_id,hh_id,total_working_hours,childcare_hours
0,Male,0.0,,0.0,0.0,0.0,0.0,apr-2020,1687033,1049420,0.0,
1,Female,3.0,,0.0,0.0,0.0,0.0,apr-2020,1662353,1049420,3.0,
2,Male,47.0,,4.0,0.0,4.0,0.0,apr-2020,1631191,1011033,51.0,
3,Female,8.0,,0.0,8.0,0.0,0.0,apr-2020,1687630,1011033,16.0,
4,Male,36.0,,0.0,0.0,0.0,0.0,apr-2020,1746405,1047651,36.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
189,Male,,,,,,,nov_2019,1700319,1132053,40.0,
190,Male,,,,,,,nov_2019,1696174,1156059,0.0,
191,Female,,,,,,,nov_2019,1678816,1144468,24.0,25.0
192,Male,,,,,,,nov_2019,1668177,1144468,36.0,4.0


In [128]:
# Make 4dimensional panel with three indices
time_use_panel_4d = time_use_panel.groupby(["hh_id","ind_id","date"]).mean()
time_use_panel_4d.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,workplace_working_hours,workplace_working_hours,home_office_no_kid_hours,home_office_no_kid_hours,home_office_kid_responsibility_hours,home_office_kid_responsibility_hours,home_office_kid_no_responsibility_hours,home_office_kid_no_responsibility_hours,childcare_residual_hours,childcare_residual_hours,homeschooling_hours,homeschooling_hours,total_working_hours,total_working_hours,childcare_hours,childcare_hours
Unnamed: 0_level_1,date,apr-2020,nov_2019,apr-2020,nov_2019,apr-2020,nov_2019,apr-2020,nov_2019,apr-2020,nov_2019,apr-2020,nov_2019,apr-2020,nov_2019,apr-2020,nov_2019
hh_id,ind_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
1011033,1631191,47.0,,,,4.0,,0.0,,4.0,,0.0,,51.0,67.0,,1.0
1011033,1687630,8.0,,,,0.0,,8.0,,0.0,,0.0,,16.0,17.0,,6.0
1027909,1649615,0.0,,,,0.0,,0.0,,0.0,,0.0,,0.0,0.0,,0.0
1027909,1724325,,,,,,,,,,,,,,20.0,,0.0
1029103,1663791,28.0,,0.0,,,,,,,,,,28.0,30.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159704,1655142,0.0,,0.0,,,,,,,,,,0.0,0.0,,
1159704,1743847,0.0,,0.0,,,,,,,,,,0.0,,,
1164056,1692111,10.0,,0.0,,,,,,,,,,10.0,,,
1173782,1715265,40.0,,0.0,,,,,,,,,,40.0,,,


In [129]:
#Average total working hours sorted by months
time_use_panel_4d.groupby(["date"]).total_working_hours.mean()

date
apr-2020    13.273585
nov_2019    17.988636
Name: total_working_hours, dtype: float64

In [130]:
#Related to task 3
time_use_panel_time_constant = time_use_panel[['hh_id', 'ind_id', 'gender']]
time_use_panel_time_constant

Unnamed: 0,hh_id,ind_id,gender
0,1049420,1687033,Male
1,1049420,1662353,Female
2,1011033,1631191,Male
3,1011033,1687630,Female
4,1047651,1746405,Male
...,...,...,...
189,1132053,1700319,Male
190,1156059,1696174,Male
191,1144468,1678816,Female
192,1144468,1668177,Male


In [131]:
### Assing childcare hours to the data frame_Task 4
time_use_panel['total_childcare_hours'] = time_use_panel[["childcare_residual_hours", "childcare_hours"]].sum(axis = 1, skipna = True)
time_use_panel.tail(50)

Unnamed: 0,gender,workplace_working_hours,home_office_no_kid_hours,home_office_kid_responsibility_hours,home_office_kid_no_responsibility_hours,childcare_residual_hours,homeschooling_hours,date,ind_id,hh_id,total_working_hours,childcare_hours,total_childcare_hours
144,Female,,,,,,,nov_2019,1610801,1098168,0.0,,0.0
145,Male,,,,,,,nov_2019,1689343,1098168,0.0,,0.0
146,Male,,,,,,,nov_2019,1767692,1084395,40.0,0.0,0.0
147,Female,,,,,,,nov_2019,1637845,1084395,28.0,0.0,0.0
148,Female,,,,,,,nov_2019,1654971,1112539,40.0,46.0,46.0
149,Male,,,,,,,nov_2019,1691231,1112539,40.0,20.0,20.0
150,Male,,,,,,,nov_2019,1708774,1114848,0.0,,0.0
151,Female,,,,,,,nov_2019,1767479,1128343,0.0,,0.0
152,Male,,,,,,,nov_2019,1733194,1128343,0.0,,0.0
153,Female,,,,,,,nov_2019,1673458,1110506,18.0,,0.0


In [132]:
#Keep data where both households answered and where childcare hours are larger than zero
time_use_panel_childcare_hours_positive = time_use_panel[time_use_panel["total_childcare_hours"] > 0]
time_use_panel_childcare_hours_positive

time_use_panel_childcare_couples = time_use_panel_childcare_hours_positive[time_use_panel_childcare_hours_positive.duplicated(subset = ["hh_id"], keep = False)]
time_use_panel_childcare_couples

Unnamed: 0,gender,workplace_working_hours,home_office_no_kid_hours,home_office_kid_responsibility_hours,home_office_kid_no_responsibility_hours,childcare_residual_hours,homeschooling_hours,date,ind_id,hh_id,total_working_hours,childcare_hours,total_childcare_hours
2,Male,47.0,,4.0,0.0,4.0,0.0,apr-2020,1631191,1011033,51.0,,4.0
6,Female,0.0,,1.0,17.0,20.0,20.0,apr-2020,1703625,1059082,18.0,,20.0
7,Male,0.0,,0.0,0.0,5.0,5.0,apr-2020,1755058,1059082,0.0,,5.0
18,Male,0.0,,20.0,0.0,20.0,10.0,apr-2020,1702907,1088259,20.0,,20.0
19,Female,0.0,,0.0,0.0,15.0,15.0,apr-2020,1693606,1088259,0.0,,15.0
23,Female,0.0,,0.0,0.0,28.0,25.0,apr-2020,1650759,1069277,0.0,,28.0
47,Male,40.0,,0.0,0.0,10.0,0.0,apr-2020,1691231,1112539,40.0,,10.0
48,Female,37.0,,0.0,0.0,80.0,0.0,apr-2020,1654971,1112539,37.0,,80.0
63,Male,0.0,,5.0,0.0,12.0,8.0,apr-2020,1651039,1083141,5.0,,12.0
64,Female,0.0,,0.0,0.0,7.0,5.0,apr-2020,1703938,1083141,0.0,,7.0


In [133]:
# male and female average childcare hours
time_use_panel_task_4 = time_use_panel_childcare_hours_positive.groupby(["gender","date"]).total_childcare_hours.mean()
time_use_panel_task_4.unstack()

date,apr-2020,nov_2019
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,21.5,28.916667
Male,11.090909,10.692308


In [134]:
#Tas_3 merge = pd.merge(nov_2019, apr_2020, how = 'outer', left_index = True, right_index = True) 
merge = pd.merge(nov_2019, apr_2020, how = 'outer')
merge

Unnamed: 0,gender,total_working_hours,childcare_hours,date,ind_id,hh_id,workplace_working_hours,home_office_no_kid_hours,home_office_kid_responsibility_hours,home_office_kid_no_responsibility_hours,childcare_residual_hours,homeschooling_hours
0,Male,0.0,2.0,nov_2019,1687033,1049420,,,,,,
1,Female,9.0,0.0,nov_2019,1662353,1049420,,,,,,
2,Male,67.0,1.0,nov_2019,1631191,1011033,,,,,,
3,Female,17.0,6.0,nov_2019,1687630,1011033,,,,,,
4,Male,40.0,12.0,nov_2019,1746405,1047651,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
189,Male,0.0,,apr-2020,1696174,1156059,0.0,0.0,,,,
190,Female,16.0,,apr-2020,1678816,1144468,16.0,,0.0,0.0,15.0,20.0
191,Male,32.0,,apr-2020,1668177,1144468,0.0,,8.0,24.0,10.0,10.0
192,Male,0.0,,apr-2020,1655142,1159704,0.0,0.0,,,,
