In [6]:
# Evaluate trip characteristics for panel households

In [3]:
import pandas as pd
import numpy as np

In [5]:
# Load 2014 and 2015 survey household data
trip14 = pd.read_excel(r'data\2014\2014-pr3-hhsurvey-trips.xlsx', sheet='Sheet1')
trip15 = pd.read_excel(r'data\2015\4_PSRC2015_Trip_2015-07-02.xlsx', sheet='Sheet1')

In [44]:
# Filter out panel households only
# If hhid starts with 14..., this is a panel household
panel_trips15 = trip15.query('hhid < 15000000')

In [45]:
panel_hhs = panel_trips15['hhid'].values.tolist()

In [46]:
panel_trips14 = trip14[trip14['hhid'].isin(panel_hhs)]

In [52]:
expwt14 = 'expwt_final'
expwt15 = 'hhid'    # No weights yet so just using arbitrary column to sum up counts

panel_trips14.year = '2014'
panel_trips15.year = '2015'

In [51]:
def df_sample(df1, df2, field, agg1, agg2, report_avg=True):
    '''Compare samples from two surveys in the same dataframe'''
    df = pd.DataFrame()
    df[df1.year + ' Samples'] = df1.groupby(field).count()[agg1]
    df[df2.year + ' Samples'] = df2.groupby(field).count()[agg2]
    df[" "*5] = ""    # empty col to separate samples from shares
    df[df1.year + ' Shares'] = (df1.groupby(field).count()[agg1]/df1[agg1].count()).round(2)
    df[df2.year + ' Shares'] = (df2.groupby(field).count()[agg2]/df2[agg2].count()).round(2)

    print df  
    print "---------" * 10
    
    if report_avg:
        print "Sample average " + df1.year + ": " + str(np.round(df1[field].mean(),2))
        print "Sample average " + df2.year + ": " + str(np.round(df2[field].mean(),2))

In [49]:
# Compare panel trips

** Trip Purpose **

In [53]:
df_sample(panel_trips14, panel_trips15, 'd_purpose', expwt14, expwt15)

           2014 Samples  2015 Samples        2014 Shares  2015 Shares
d_purpose                                                            
1                  4234          4064               0.34         0.32
2                  1620          1625               0.13         0.13
3                   288           306               0.02         0.02
4                   814           829               0.07         0.07
5                   750           702               0.06         0.06
6                   375           630               0.03         0.05
7                   265           253               0.02         0.02
8                   901           689               0.07         0.05
9                   710           528               0.06         0.04
10                  705           819               0.06         0.06
11                  744           696               0.06         0.06
12                  366           325               0.03         0.03
13                  

** Mode **

In [54]:
df_sample(panel_trips14, panel_trips15, 'mode', expwt14, expwt15)

      2014 Samples  2015 Samples        2014 Shares  2015 Shares
mode                                                            
-99              6           NaN               0.00          NaN
 1            4580          4559               0.37         0.36
 2            2958          2926               0.24         0.23
 3             656           803               0.05         0.06
 4              49            14               0.00         0.00
 5              39            51               0.00         0.00
 6             291           226               0.02         0.02
 7            2554          2689               0.21         0.21
 8             898          1000               0.07         0.08
 9              55            54               0.00         0.00
 10             31            31               0.00         0.00
 11              5             2               0.00         0.00
 12            108           134               0.01         0.01
 13             19       

In [55]:
com14 = panel_trips14.query('d_purpose == 2 and o_purpose == 1')    # commute only trips
com15 = panel_trips15.query('d_purpose == 2 and o_purpose == 1')
com14.year = panel_trips14.year
com15.year = panel_trips15.year
df_sample(com14, com15, 'mode', expwt14, expwt15)

      2014 Samples  2015 Samples        2014 Shares  2015 Shares
mode                                                            
1              559           549               0.53         0.57
2               50            47               0.05         0.05
3               14            11               0.01         0.01
4                7             6               0.01         0.01
5               13             5               0.01         0.01
6               65            54               0.06         0.06
7               77            90               0.07         0.09
8              231           190               0.22         0.20
9               16             3               0.02         0.00
10               6           NaN               0.01          NaN
11               1           NaN               0.00          NaN
13               3             4               0.00         0.00
15              13             3               0.01         0.00
-------------------------

In [56]:
df_sample(panel_trips14, panel_trips15, 'trip_dur_reported', expwt14, expwt15)

                   2014 Samples  2015 Samples        2014 Shares  2015 Shares
trip_dur_reported                                                            
-99                           7           NaN               0.00          NaN
 5                         2814          2714               0.23         0.22
 10                        2650          2654               0.21         0.21
 15                        2058          2282               0.17         0.18
 20                        1193          1367               0.10         0.11
 25                         761           745               0.06         0.06
 30                         946           941               0.08         0.07
 35                         365           394               0.03         0.03
 40                         338           357               0.03         0.03
 45                         334           304               0.03         0.02
 50                         164           142               0.01