- add new expansion weights to survey.h5 to create calibration dataset
- expansion weights are created at the household level, but applied to all datasets based on hh ID
    - e.g., HH ID 14100001 has expansion factor of 45.545, so hhexpfac is 45.545. For each trip record, the 'trexpfac' field has values of 45.545 for each trip. 

In [11]:
import pandas as pd
import h5py

In [9]:
# Load new weights
wt = pd.read_csv(r'data/hh2014_ipfweights_3.dat',sep=' ')

In [12]:
# Load latest 2014 survey file
h5file = h5py.File(r'R:\SoundCast\Inputs\2014\etc\survey.h5')

In [122]:
expwt_dict = {'Trip':'trexpfac',
             'Tour':'toexpfac',
             'Household':'hhexpfac',
              'HouseholdDay':'hdexpfac',
             'Person':'psexpfac',
             'PersonDay':'pdexpfac'}

group_dict={
            'hday': 'HouseholdDay',
            'hrec': 'Household',
            'pday': 'PersonDay',
            'prec': 'Person',
            'tour': 'Tour',
            'trip': 'Trip'
        }

# results h5 file
f = h5py.File('survey_updated.h5', 'w')

h5_dict = {}
for table in h5file.keys():
    df = pd.DataFrame()
    for field in h5file[table].keys():
        df[field] = h5file[table][field][:]
        
    # Update expansion weight fields
    df = pd.merge(df,wt[['hhid','expwt_final_3']],left_on='hhno',right_on='hhid',how='left')
    
    # Fill missing fields (from college survey) with old values
    df['expwt_final_3'].fillna(df[expwt_dict[table]],inplace=True)
    df[expwt_dict[table]] = df['expwt_final_3']
    df.drop(['expwt_final_3','hhid'],axis=1,inplace=True)
    
    h5_dict[table] = df
    
    # Write results to new h5 container
    grp = f.create_group(table)
    
    for column in df.columns:
        if column in ['travdist','travcost','travtime','trexpfac',
        'tautotime','tautocost','tautodist','toexpfac','hdexpfac'
        'pwautime','pwaudist', 'psautime','psaudist','psexpfac',
        'pdexpfac', 'hhexpfac'
        ]:
            grp.create_dataset(column, data=list(df[column].astype('float64')))
        else:
            grp.create_dataset(column, data=list(df[column].astype('int32')))
            
f.close()

In [121]:
f.close()

In [111]:
samp = 14106065
h5_dict['Trip'][h5_dict['Trip']['hhno'] == samp]

Unnamed: 0,arrtm,dadtyp,day,deptm,dorp,dpcl,dpurp,dtaz,endacttm,half,...,otaz,pathtype,pno,tour,travcost,travdist,travtime,trexpfac,tseg,tsvid
53852,425,2,1,395,1,280652,1,3518,900,1,...,3617,1,1,1,0,12.58,23.83,348.8189,1,1
53853,920,4,1,900,1,330830,4,3615,925,2,...,3518,1,1,1,0,12.26,23.46,348.8189,1,2
53854,930,1,1,925,1,329612,0,3617,975,2,...,3615,1,1,1,0,0.47,5.64,348.8189,2,3
53855,1025,4,1,975,1,346928,3,3642,1030,1,...,3617,1,1,2,0,10.62,30.92,348.8189,1,4
53856,1055,4,1,1030,1,319160,3,3563,1055,2,...,3642,1,1,2,0,13.37,24.27,348.8189,1,5
53857,1065,1,1,1055,1,329612,0,3617,1080,2,...,3563,1,1,2,0,5.74,14.19,348.8189,2,6
53858,1160,4,1,1150,1,319497,3,3563,1160,1,...,3617,1,1,3,0,5.85,14.64,348.8189,1,9
53859,1180,4,1,1160,1,347569,5,3657,1200,2,...,3563,1,1,3,0,13.44,22.3,348.8189,1,10
53860,1205,4,1,1200,1,346928,3,3642,1205,2,...,3657,1,1,3,0,1.9,6.9,348.8189,2,11
53861,1230,1,1,1205,1,329612,0,3617,1250,2,...,3642,1,1,3,0,10.31,22.84,348.8189,3,12


In [112]:
wt[wt['hhid'] == samp]

Unnamed: 0,hhid,expwt_initial,expwt_final_3
6006,14106065,327.156,348.8189


In [116]:
h5_dict['Trip'].fillna(-1)

Unnamed: 0,arrtm,dadtyp,day,deptm,dorp,dpcl,dpurp,dtaz,endacttm,half,...,otaz,pathtype,pno,tour,travcost,travdist,travtime,trexpfac,tseg,tsvid
0,820,4,1,780,-1,1256966,4,815,900,1,...,553,3,1,1,2.25,8.11,26.51,16.5100,1,1
1,915,4,1,900,-1,1223110,5,629,1020,1,...,815,3,1,1,2.25,5.78,21.02,16.5100,2,2
2,1080,1,1,1020,-1,750257,0,553,180,2,...,629,3,1,1,2.25,2.65,10.65,16.5100,1,3
3,570,4,1,545,1,751756,4,304,630,1,...,281,1,1,1,0.00,0.62,6.74,3.9100,1,1
4,640,1,1,630,1,1223541,0,281,180,2,...,304,1,1,1,0.00,0.68,6.54,3.9100,1,2
5,700,2,1,630,-1,1222518,2,552,1050,1,...,752,3,1,1,2.25,7.97,25.53,4.1600,1,1
6,1070,4,1,1050,-1,828899,4,503,1080,2,...,552,3,1,1,2.25,0.95,3.36,4.1600,1,2
7,1140,1,1,1080,-1,937172,0,752,180,2,...,503,3,1,1,2.25,6.87,19.26,4.1600,2,3
8,440,4,1,405,-1,1098718,1,8,1020,1,...,1840,3,1,1,2.75,5.42,22.27,3.9100,1,1
9,1050,2,1,1020,-1,932775,2,72,1280,1,...,8,3,1,1,2.25,2.57,8.66,3.9100,2,2


In [84]:
# Fill missing fields (from college survey) with old values
df['expwt_final_3'].fillna(df['hhexpfac'],inplace=True)
df['hhexpfac'] = df['expwt_final_3']
df.drop(['expwt_final_3','hhid'],axis=1,inplace=True)

In [91]:
h5_dict['Household'][h5_dict['Household']['hhno'] == samp]

Unnamed: 0,hh515,hhcu5,hhexpfac,hhftw,hhhsc,hhincome,hhno,hhoad,hhparcel,hhptw,hhret,hhsize,hhtaz,hhuni,hhvehs,hhwkrs,hownrent,hrestype,samptype
1929,0,0,83.972,2,0,40000,14100006,0,718302,0,0,2,464,0,0,2,2,5,0


In [92]:
df[df['hhno'] == samp]

Unnamed: 0,hh515,hhcu5,hhexpfac,hhftw,hhhsc,hhincome,hhno,hhoad,hhparcel,hhptw,hhret,hhsize,hhtaz,hhuni,hhvehs,hhwkrs,hownrent,hrestype,samptype
1929,0,0,54.5609,2,0,40000,14100006,0,718302,0,0,2,464,0,0,2,2,5,0


In [93]:
wt[wt['hhid'] == samp]

Unnamed: 0,hhid,expwt_initial,expwt_final_3
5,14100006,77.2094,54.5609


In [94]:
h5_dict['Tour'][h5_dict['Tour']['hhno'] == samp]

Unnamed: 0,c,d,day,fhtindx1,fhtindx2,hhno,id,jtindex,parent,pdpurp,...,tlvorig,tmodetp,toadtyp,toexpfac,topcl,totaz,tour,tpathtp,tripsh1,tripsh2
2473,225,69,1,0,0,14100006,1410000611,0,0,1,...,505,6,1,83.97,718297,464,1,3,1,2
2474,275,1240,1,0,0,14100006,1410000621,0,0,1,...,480,6,1,83.97,718297,464,1,3,1,1


In [46]:
wt

Unnamed: 0,hhid,expwt_initial,expwt_final_3
0,14100001,87.2388,75.9405
1,14100002,87.2388,32.4822
2,14100003,71.0773,49.4452
3,14100004,71.0773,61.4777
4,14100005,71.0773,95.7208
5,14100006,77.2094,54.5609
6,14100007,71.0773,81.5452
7,14100008,77.2094,83.6118
8,14100009,87.2388,47.4075
9,14100010,87.2388,67.6125
