## Daysim Person and Household Models

*Summaries:*
    - Total People & Workers
    - Household Size
    - County Population
    - Age Distribution
    - Worker Types
    - Distance to Work
    - Transit Pass Ownership
    - Vehicle Ownership
---

In [721]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [722]:
import os
import numpy as np
import pandas as pd
import h5py
import pylab as P
from IPython.display import display, display_pretty, Javascript, HTML
from pandas_highcharts.core import serialize
from pandas_highcharts.display import display_charts
import matplotlib.pyplot as plt

# Show charts in notebook
# %pylab inline

In [723]:
# Set main model directory to parent directory

# 2006 survey
model_dir = r'R:\SoundCast\releases\TransportationFutures2010\scripts\summarize'

# 2014 survey
base_dir = r'R:\SoundCast\releases\TransportationFutures2010\scripts\summarize' 

In [724]:
# Read Model Scenario Results
scen = h5py.File(model_dir + r'/survey.h5','r+')
scen_name = '2006 Survey'

In [725]:
# Read Base Data
base_file = r'/survey14.h5'

base = h5py.File(base_dir + base_file ,'r+')
base_name = '2014 Survey'

In [726]:
# Get person data into a single dataframe for base and scen
data_table = 'Person'

person_scen = pd.DataFrame(data={'Household ID': [i[0] for i in scen[data_table]['hhno'][:]],
                               'Person Number': [i[0] for i in scen[data_table]['pno'][:]],
                               'Transit Pass': [i[0] for i in scen[data_table]['ptpass'][:]],
                                'Auto Time to Work': [i[0] for i in scen[data_table]['pwautime'][:]],
                                'Auto Distance to Work': [i[0] for i in scen[data_table]['pwaudist'][:]],
                                'Worker Type': [i[0] for i in scen[data_table]['pwtyp'][:]],
                                'Student Type': [i[0] for i in scen[data_table]['pstyp'][:]],
                                'Usual Commute Mode': [i[0] for i in scen[data_table]['pwtaz'][:]],
                                'Workplace TAZ': [i[0] for i in scen[data_table]['pwtaz'][:]],
                                'Age': [i[0] for i in scen[data_table]['pagey'][:]],
                                'Expansion Factor': [i[0] for i in scen[data_table]['psexpfac'][:]]})

person_base = pd.DataFrame(data={'Household ID': [i[0] for i in base[data_table]['hhno'][:]],
                                 'Person Number': [i[0] for i in base[data_table]['pno'][:]],
                                 'Transit Pass': [i[0] for i in base[data_table]['ptpass'][:]],
                                 'Auto Time to Work': [i[0] for i in base[data_table]['ptpass'][:]],
                                 'Auto Distance to Work': [i[0] for i in base[data_table]['pwautime'][:]],
                                 'Worker Type': [i[0] for i in base[data_table]['pwtyp'][:]],
                                 'Student Type': [i[0] for i in base[data_table]['pstyp'][:]],
                                 'Usual Commute Mode': [i[0] for i in base[data_table]['puwmode'][:]],
                                 'Workplace TAZ': [i[0] for i in base[data_table]['pwtaz'][:]],
                                 'Age': [i[0] for i in base[data_table]['pagey'][:]],
                                 'Expansion Factor': [i[0] for i in base[data_table]['psexpfac'][:]]})

In [727]:
# Create unique ID for person by concatenating household ID and person number 
person_scen['personID'] = (person_scen['Household ID'].astype('str')+person_scen['Person Number'].astype('str')).astype('int')
person_base['personID'] = (person_base['Household ID'].astype('str')+person_base['Person Number'].astype('str')).astype('int')

In [728]:
# Get household data into a single dataframe for base and scen
data_table = 'Household'

hh_scen = pd.DataFrame(data={'Household ID': [i[0] for i in scen[data_table]['hhno'][:]],
                                'Household Size': [i[0] for i in scen[data_table]['hhsize'][:]],
                                'Household Vehicles': [i[0] for i in scen[data_table]['hhvehs'][:]],
                                 'Household Workers': [i[0] for i in scen[data_table]['hhwkrs'][:]],
                                 'Household Income': [i[0] for i in scen[data_table]['hhincome'][:]],
                                 'Household TAZ': [i[0] for i in scen[data_table]['hhtaz'][:]],
                                 'Expansion Factor': [i[0] for i in scen[data_table]['hhexpfac'][:]]})

hh_base = pd.DataFrame(data={'Household ID': [i[0] for i in base[data_table]['hhno'][:]],
                                 'Household Size': [i[0] for i in base[data_table]['hhsize'][:]],
                                 'Household Vehicles': [i[0] for i in base[data_table]['hhvehs'][:]],
                                 'Household Workers': [i[0] for i in base[data_table]['hhwkrs'][:]],
                                 'Household Income': [i[0] for i in base[data_table]['hhincome'][:]],
                                 'Household TAZ': [i[0] for i in base[data_table]['hhtaz'][:]],
                                 'Expansion Factor': [i[0] for i in base[data_table]['hhexpfac'][:]]})

In [729]:
# Add labels for worker type
labels = {
  0: "Not a worker",  
  1: "Full-time worker",
  2: "Part-time worker",
}

person_base['Worker Type'] = ([labels[x] for x in person_base['Worker Type']])
person_scen['Worker Type'] = ([labels[x] for x in person_scen['Worker Type']])

In [730]:
# # Add labels for student type
# labels = {
#   0: "Not a student",  
#   1: "Full-time student",
#   2: "Part-time student",
# }

# person_base['Student Type'] = ([labels[x] for x in person_base['Student Type']])
# person_scen['Student Type'] = ([labels[x] for x in person_scen['Student Type']])

In [731]:
# Join household records to person records
hh_per_scen = person_scen.join(hh_scen,on='Household ID',lsuffix='_p',rsuffix='_h')
hh_per_base = person_base.join(hh_base,on='Household ID',lsuffix='_p',rsuffix='_h')

In [732]:
# Join geography information to these cells

## Total People

In [733]:
print scen_name + ": "+ str(person_scen['personID'].count()).split('.')[0]
print base_name + ": "+ str(person_base['personID'].count()).split('.')[0]

2006 Survey: 10510
2014 Survey: 14250


## Average Household Size

In [734]:
print scen_name + ": "+ str(hh_scen['Household Size'].mean())
print base_name + ": "+ str(hh_base['Household Size'].mean())

2006 Survey: 2.21683189201
2014 Survey: 1.76130467013


## Household Size Distribution

In [735]:
df = pd.DataFrame([hh_scen.groupby('Household Size').count()['Household ID'],
                   hh_base.groupby('Household Size').count()['Household ID']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
Household Size,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1544,4253
2,1746,2444
3,621,723
4,613,497
5,158,123
6,49,37
7,5,15
8,5,2


In [736]:
# Distribution
df_dist = pd.DataFrame([df[scen_name]/person_scen.count()['Household ID'],
                   df[base_name]/person_base.count()['Household ID']]).T
df_dist
display_charts(df_dist, kind='bar', title='Household Size Distribution')

---

## Age Distribution

In [737]:
# 2006 survey age is continuous variable
# 2014 is categorical
from collections import OrderedDict


# lookup for 2014 names (which are averaged based on min and max of bin)
age_cat = OrderedDict([(2, '   <5'),
           (8, ' 5-11'),
           (14, '12-15'),
           (17, '16-17'),
          (21, '18-24'),
          (30, '25-34'),
          (40, '35-44'),
          (50, '44-54'),
          (60, '55-64'),
          (70, '65-74'),
          (80, '75-84'),
          (90, '85+')])


person_base['Age'] = [age_cat[x] for x in person_base['Age'].as_matrix()]

In [738]:
# convert 2006 into categorical
age_bins = [0,4,11,15,17,24,34,44,54,64,74,84,1000]
person_scen['Age'] = pd.cut(person_scen['Age'],age_bins,labels=age_cat.values())

In [739]:
df = pd.DataFrame([person_scen.groupby('Age').count()['Household ID'],
                   person_base.groupby('Age').count()['Household ID']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
Age,Unnamed: 1_level_1,Unnamed: 2_level_1
<5,569,667
5-11,860,691
12-15,492,382
16-17,259,192
18-24,390,1911
25-34,878,2759
35-44,1572,1931
44-54,2002,1765
55-64,1840,2004
65-74,918,1306


In [740]:
# Distribution
df_new = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
             df[base_name]/df[base_name].sum()]).T
df_new

display_charts(df_new, title='Age', kind='bar')

---

## Workers Types

In [741]:
df = pd.DataFrame([person_scen.groupby('Worker Type').count()['personID'],
                   person_base.groupby('Worker Type').count()['personID']]).T
df.columns=([scen_name,base_name])
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
Worker Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Full-time worker,4285,6805
Not a worker,5160,6192
Part-time worker,1065,1253


---

** Distribution of Worker Types**

In [742]:
# Distribution
df_dist = pd.DataFrame([df[scen_name]/person_scen.count()['personID'],
                   df[base_name]/person_base.count()['personID']]).T

In [743]:
display_charts(df_dist, kind='bar', title='Worker Type Distribution')

---

## Workers by County

In [744]:
# # Workers by County

# scen_workers = hh_per_scen[hh_per_scen['Worker Type'] <> 'Not a worker']
# base_workers = hh_per_base[hh_per_base['Worker Type'] <> 'Not a worker']

# df = scen_workers.groupby('County').count()['personID']
# df=df[df.index<>0]
# df=pd.DataFrame(df)
# df.columns=[scen_name]

# df_base = base_workers.groupby('County').count()['personID']
# df_base=df_base[df_base.index<>0]
# df[base_name]=df_base
# df

# # Also add in the CTPP here
# # 2006-2010 CTPP

# ctpp_workers = {'King':53625,'Kitsap':6475,'Pierce':15705,'Snohomish':15810}

# df['CTPP 06-10'] = pd.DataFrame(ctpp_workers.values(),index=ctpp_workers.keys())

In [745]:
# # Distribution
# df_dist = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
#                        df[base_name]/df[base_name].sum(),
#                        df['CTPP 06-10']/df['CTPP 06-10'].sum()]).T

In [746]:
# display_charts(df_dist, kind='bar', title='Worker Distribution by County')

---

## Distance to Work

In [747]:
# Filter out non-workers
scen_df = (person_scen[person_scen['Worker Type'] <> 'Not a worker'])
base_df = (person_base[person_base['Worker Type'] <> 'Not a worker'])

df = pd.DataFrame([scen_df.groupby('Age').mean()['Auto Distance to Work'],
              base_df.groupby('Age').mean()['Auto Distance to Work']]).T
df.columns=([scen_name,base_name])
df.fillna(0,inplace=True)
df

Unnamed: 0,2006 Survey,2014 Survey
<5,0.0,0
5-11,0.0,0
12-15,0.0,0
16-17,3.865185,1
18-24,8.31975,1
25-34,10.625236,1
35-44,10.303467,1
44-54,10.10478,1
55-64,9.807923,1
65-74,8.225459,1


In [748]:
display_charts(df, title='Distance to Work', kind='bar')

---

## Transit Pass Ownership

In [749]:
df = pd.DataFrame([person_scen.groupby('Transit Pass').mean()['Auto Distance to Work'],
              person_base.groupby('Transit Pass').mean()['Auto Distance to Work']]).T
df.columns=([scen_name,base_name])

In [750]:
df.index = ['No Pass', 'Transit Pass']
df

Unnamed: 0,2006 Survey,2014 Survey
No Pass,4.14286,
Transit Pass,7.484511,1.0


---

## Auto Ownership

In [751]:
new_hh_scen = hh_scen[hh_scen.index>=0]
df = pd.DataFrame([new_hh_scen.groupby('Household Vehicles').count()['Household ID'],
                   hh_base.groupby('Household Vehicles').count()['Household ID']]).T
df.columns=([scen_name,base_name])
df=df[df.index>=0]
df

Unnamed: 0_level_0,2006 Survey,2014 Survey
Household Vehicles,Unnamed: 1_level_1,Unnamed: 2_level_1
0,219,1412.0
1,1576,3331.0
2,1914,2063.0
3,712,613.0
4,207,200.0
5,76,58.0
6,24,9.0
7,6,9.0
8,5,4.0
9,1,


In [752]:
# Distribution
df_new = pd.DataFrame([df[scen_name]/df[scen_name].sum(),
             df[base_name]/df[base_name].sum()]).T

In [753]:
display_charts(df_new, title='Vehicles per Household', kind='bar', xlim=(0,6))

---