In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
from atusfunclib import load_data
from activitylib import ACTINFO
from wlmetrics import *

In [9]:
# Import all data
data_import = load_data(loc='data')

In [10]:
# Unpack individual dataframes
df, dfactcodes, dfeducodes, dfinccodes, dfagecodes, \
dfempcodes, dfindcodes, dfraccodes, dfloccodes, dfwhocodes, \
dfdemocodes = data_import

In [11]:
# Convert category columns to float
df['TEAGE_CAT'] = df['TEAGE_CAT'].astype(float)
df['TRERNWA_CAT'] = df['TRERNWA_CAT'].astype(float)

Metrics to calculate:
1. Weighted `sum(life) / sum(work)`
1. More than 10h work a day
1. % of day spent on Personal Care
1. % of day spent on Leisure
1. More than 5h childcare and housework
1. Average time Career, Social, Financial, Physical, and Community

In [12]:
w_l_balance_housework(df).head(20)

0     False
1     False
2     False
3     False
4     False
5      True
6     False
7     False
8      True
9     False
10    False
11    False
12    False
13    False
14    False
15     True
16    False
17    False
18    False
19    False
dtype: bool

In [13]:
w_l_balance_personalcare(df).head(20)

0     0.635417
1     0.534722
2     0.496528
3     0.541667
4     0.383333
5     0.538194
6     0.451389
7     0.375000
8     0.548611
9     0.187500
10    0.409722
11    0.475694
12    0.454167
13    0.583333
14    0.461806
15    0.500000
16    0.468750
17    0.451389
18    0.486111
19    0.475694
dtype: float64

In [14]:
w_l_balance_leisuresocial(df).head(20)

0     0.364583
1     0.409722
2     0.236111
3     0.184028
4     0.083333
5     0.211806
6     0.001389
7     0.218750
8     0.083333
9     0.352083
10    0.083333
11    0.121528
12    0.093056
13    0.340278
14    0.078472
15    0.125000
16    0.409722
17    0.000000
18    0.240278
19    0.317361
dtype: float64

In [15]:
w_l_balance_workday(df).head(20)

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10     True
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
Name: t0501, dtype: bool

In [16]:
weights_p = np.ones(len(ACTINFO['positiveWL']))
weights_n = np.ones(len(ACTINFO['negoccWL']))

w_l_balance_weighted_ratio(df,
                           ACTINFO['positiveWL'],
                           ACTINFO['negoccWL'],
                           weights_p,
                           weights_n).head(20)

0     7.273093
1     7.215975
2     6.962243
3     1.467932
4    -0.411883
5     6.985642
6    -0.454766
7    -0.195667
8     6.814543
9     0.370728
10   -0.676125
11    6.758095
12    0.060070
13    7.193686
14   -0.255726
15    2.292646
16    7.143618
17    1.603312
18    6.953684
19    7.041412
dtype: float64

In [17]:
df['metric1'] = w_l_balance_weighted_ratio(df,
                           ACTINFO['positiveWL'],
                           ACTINFO['negoccWL'],
                           weights_p,
                           weights_n)

In [18]:
df['metric2'] = w_l_balance_workday(df)

In [19]:
df['metric3'] = w_l_balance_personalcare(df)

In [20]:
df['metric4'] = w_l_balance_leisuresocial(df)

In [21]:
df['metric5'] = w_l_balance_housework(df)

In [22]:
df.head()

Unnamed: 0,TUCASEID,TEAGE,TESEX,TUYEAR,TRHOLIDAY,TUDIARYDAY,GEMETSTA,GTMETSTA,TEHRUSLT,TELFS,...,t14_W,t15_W,t16_W,t18_W,t50_W,metric1,metric2,metric3,metric4,metric5
0,20030100013280,60,1,2003,0,6,1.0,,30.0,2,...,0.0,0.0,0.0,0.0,0.0,7.273093,False,0.635417,0.364583,False
1,20030100013344,41,2,2003,0,7,2.0,,30.0,1,...,0.0,0.0,0.0,34706450.0,0.0,7.215975,False,0.534722,0.409722,False
2,20030100013352,26,2,2003,0,7,1.0,,12.0,2,...,0.0,0.0,229831600.0,38305270.0,0.0,6.962243,False,0.496528,0.236111,False
3,20030100013848,36,2,2003,0,5,2.0,,,4,...,0.0,0.0,0.0,0.0,0.0,1.467932,False,0.541667,0.184028,False
4,20030100014165,51,1,2003,0,5,2.0,,80.0,1,...,0.0,0.0,0.0,239334200.0,0.0,-0.411883,False,0.383333,0.083333,False
