# Compare base model results to observed

Compare the results of the base ActivitySim run to observed values in Los Angeles area from the 2017 NHTS.

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import openmatrix as omx
import tables
from matplotlib import ticker
import nhts

In [None]:
plt.style.use('asu-light')

In [None]:
skims = omx.open_file('../model_inputs/skims.omx', 'r')

In [None]:
abm_res = tables.open_file("/Volumes/Pheasant Ridge/diss_data/model_output/abm/base/pipeline.h5")
abmfn = "/Volumes/Pheasant Ridge/diss_data/model_output/abm/base/pipeline.h5"

In [None]:
trip_mode_choice = pd.read_hdf(abmfn, "/trips/trip_mode_choice")

In [None]:
# save some memory
total_mem_before = trip_mode_choice.memory_usage(deep=True)

In [None]:
total_mem_before_mb = total_mem_before.sum() // (1024**2)
trip_mode_choice['primary_purpose'] = trip_mode_choice.primary_purpose.astype('category')
trip_mode_choice['purpose'] = trip_mode_choice.purpose.astype('category')
trip_mode_choice['trip_mode'] = trip_mode_choice.trip_mode.astype('category')
assert not (trip_mode_choice.dtypes == 'object').any()
total_mem_after = trip_mode_choice.memory_usage(deep=True)  # deep should not matter since no obj columns, but make it definitely comparable
total_mem_after_mb = total_mem_after.sum() // (1024**2)
print(f'data type conversion saved {total_mem_before_mb - total_mem_after_mb:,d}mb RAM (before {total_mem_before_mb:,d}mb, now {total_mem_after_mb:,d}mb)')

In [None]:
abm_res.list_nodes('/')

In [None]:
# load the tour participation information
tour_participation = pd.read_hdf(abmfn, '/joint_tour_participants/trip_mode_choice')

In [None]:
tour_participation

In [None]:
n_on_tour = tour_participation.groupby('tour_id').size()

In [None]:
n_on_tour.min()

In [None]:
trip_mode_choice = trip_mode_choice.merge(pd.DataFrame(n_on_tour.rename('n_on_tour')), left_on='tour_id', right_index=True, how='left')

In [None]:
# no 1 person tours appear in tour_participation
trip_mode_choice['n_on_tour'] = trip_mode_choice.n_on_tour.fillna(1)

## Read NHTS

In [None]:
nhts_tr = pd.read_csv('../data/canhts/nhts17_caltrans_tsdc_download/survey_data/survey_trips.csv')
# use 5-day weights since we simulated a weekday
nhts_pwgt = pd.read_csv('../data/canhts/nhts17_caltrans_tsdc_download/survey_data/weights_person_5day.csv')
nhts_hhwgt = pd.read_csv('../data/canhts/nhts17_caltrans_tsdc_download/survey_data/weights_household_5day.csv')
nhts_hh = pd.read_csv('../data/canhts/nhts17_caltrans_tsdc_download/survey_data/survey_households.csv')
nhts_hh = nhts_hh.dropna(how='all')  # get rid of blank lines
nhts_loc = (
    pd.read_csv('../data/canhts/nhts17_caltrans_tsdc_download/survey_data/survey_location.csv',
                low_memory=False,
               dtype={'statefips': 'str', 'cntyfips': 'str'}).set_index(['sampno', 'locno'])
)

In [None]:
nhts_loc['cntyfips'] = nhts_loc.cntyfips.str.zfill(3)
nhts_loc['statefips'] = nhts_loc.statefips.str.zfill(2)

In [None]:
nhts_loc.loc[pd.IndexSlice[:,100],:]

In [None]:
nhts_tr = nhts_tr.merge(nhts_pwgt, on=['sampno', 'perno'], how='left', validate='m:1')
nhts_tr = nhts_tr.merge(nhts_hhwgt, on='sampno', how='left', validate='m:1')
nhts_tr = nhts_tr.merge(nhts_hh, on='sampno', how='left', validate='m:1')
nhts_tr = nhts_tr.merge(nhts_loc.rename(columns='d_{}'.format), left_on=['sampno', 'locno'], right_index=True, how='left', validate='m:1')
nhts_tr = nhts_tr.merge(nhts_loc.rename(columns='o_{}'.format), left_on=['sampno', 'o_locno'], right_index=True, how='left', validate='m:1')
home_locs = nhts_loc.loc[pd.IndexSlice[:,100],:].copy()
home_locs.index = home_locs.index.droplevel(1)
nhts_tr = nhts_tr.merge(home_locs.rename(columns='h_{}'.format), left_on='sampno', right_index=True, how='left', validate='m:1')

In [None]:
# some have null weights - mostly weekends where it's expected, the few 2.0 and 5.0 values are probably holidays
nhts_tr[nhts_tr.wttrdfin5d.isnull()].travday.value_counts()

In [None]:
# bring this in line with our simulation
nhts_tr = nhts_tr[
    ~nhts_tr.wttrdfin5d.isnull() &
    (nhts_tr.o_statefips == '06') &
    nhts_tr.o_cntyfips.isin(['025', '037', '059', '065', '071', '111']) &
    (nhts_tr.d_statefips == '06') &
    nhts_tr.d_cntyfips.isin(['025', '037', '059', '065', '071', '111']) &
    (nhts_tr.h_statefips == '06') &
    nhts_tr.h_cntyfips.isin(['025', '037', '059', '065', '071', '111'])
].copy()


In [None]:
len(nhts_tr)

In [None]:
# n households
len(nhts_tr.sampno.unique())

In [None]:
# n persons
len(nhts_tr[['sampno', 'perno']].drop_duplicates())

## Trip length distribution

Note that this is for _person_ trips.

In [None]:
dist = np.array(skims['DIST'])
dist = pd.DataFrame(dist, index=np.arange(dist.shape[0]), columns=np.arange(dist.shape[1])).stack()
dist *= 1.609 # begone imperial units

In [None]:
trip_mode_choice = trip_mode_choice.merge(pd.DataFrame(dist.rename('trip_dist_km')), left_on=['origin', 'destination'], right_index=True, how='left', validate='m:1')

In [None]:
cumulative_trip_dist = np.sort(trip_mode_choice.trip_dist_km.to_numpy())[::1000]
cumulative_trip_dist_prop = np.arange(len(cumulative_trip_dist)) / len(cumulative_trip_dist) * 100

In [None]:
# now, compute cumulative trip dist from nhts
# sort unknows to start, then slice them off
tr_miles_sort = np.argsort(nhts_tr.trpmiles.to_numpy())[(nhts_tr.trpmiles < 0).sum():]
assert not (nhts_tr.trpmiles.iloc[tr_miles_sort] < 0).any()
nhts_ctrpdist = (nhts_tr.trpmiles.iloc[tr_miles_sort] * 1.609)
nhts_ctrpdist_prop = np.cumsum(nhts_tr.wttrdfin5d.iloc[tr_miles_sort]) / nhts_tr.wttrdfin5d.sum() * 100

In [None]:
f, ax = plt.subplots()
plt.plot(nhts_ctrpdist, nhts_ctrpdist_prop, label='NHTS')
plt.plot(cumulative_trip_dist, cumulative_trip_dist_prop, label='Simulated')
plt.xlim(0, 100)
#plt.xticks([0.1, 1, 10, 100], [0.1, 1, 10, 100])
plt.legend()
plt.xlabel('Trip length (km)')
plt.ylabel('Cumulative proportion')
ax.yaxis.set_major_formatter(ticker.FuncFormatter('{:.0f}%'.format))
plt.savefig('../../dissertation/fig/abm/trip_length_fit.pdf', bbox_inches='tight')

## Mode choice

Again for person trips.

In [None]:
# aggregate mode choices in ActivitySim output
trip_mode_choice['smplmode'] = trip_mode_choice.trip_mode.map({
    'BIKE': 'Bike',
    'DRIVEALONEFREE': 'Drive alone',
    'DRIVEALONEPAY': 'Drive alone',
    'DRIVE_COM': 'Transit',
    'DRIVE_EXP': 'Transit',
    'DRIVE_HVY': 'Transit',
    'DRIVE_LOC': 'Transit',
    'DRIVE_LRF': 'Transit',
    'SHARED2FREE': 'Carpool',
    'SHARED2PAY': 'Carpool',
    'SHARED3FREE': 'Carpool',
    'SHARED3PAY': 'Carpool',
    'TAXI': 'Taxi/TNC',
    'TNC_SHARED': 'Taxi/TNC',
    'TNC_SINGLE': 'Taxi/TNC',
    'WALK': 'Walk',
    'WALK_COM': 'Transit',
    'WALK_EXP': 'Transit',
    'WALK_HVY': 'Transit',
    'WALK_LOC': 'Transit',
    'WALK_LRF': 'Transit'
}).astype('category')

In [None]:
nhts_tr['smplmode'] = nhts_tr.trptrans.map({
    -9: "Other",
    -8: "Other",
    -7: "Other",
    -1: "Other",
     1: "Walk",
     2: "Bike",
     3: "Drive alone",
     4: "Drive alone",
     5: "Drive alone",
     6: "Drive alone",
     7: "Other",
     8: "Other",
     9: "Other",
    10: "Other",  # or maybe transit? not sure how ActivitySim handles school bus trips
    11: "Transit",
    12: "Other",
    13: "Transit",
    14: "Other",
    15: "Transit",
    16: "Transit",
    17: "Taxi/TNC",
    18: "Other",
    19: "Other",
    20: "Other",
    97: "Other"
}).astype('category').cat.add_categories(["Carpool"])

In [None]:
nhts_tr.loc[(nhts_tr.numontrp - nhts_tr.nonhhcnt >= 2) & (nhts_tr.smplmode == 'Drive alone'), 'smplmode'] = "Carpool"

In [None]:
sim_shares = trip_mode_choice.groupby('smplmode').size() / len(trip_mode_choice)
nhts_shares = nhts_tr[nhts_tr.smplmode != 'Other'].groupby('smplmode').wttrdfin5d.sum() / nhts_tr.loc[nhts_tr.smplmode != "Other", 'wttrdfin5d'].sum()
nhts_shares = nhts_shares.reindex(sim_shares.index)

In [None]:
f, ax = plt.subplots()
plt.bar(np.arange(len(sim_shares)) - 0.2, nhts_shares * 100, width=0.4, label='NHTS')
plt.bar(np.arange(len(sim_shares)) + 0.2, sim_shares * 100, width=0.4, label='Simulated')
plt.xticks(np.arange(len(sim_shares)), sim_shares.index)
plt.legend()
ax.yaxis.set_major_formatter(ticker.FuncFormatter('{:.0f}%'.format))
plt.savefig('../../dissertation/fig/abm/mode_choice_comparison.pdf', bbox_inches='tight')

In [None]:
simhr = trip_mode_choice.groupby('depart').size() / len(trip_mode_choice) * 100

In [None]:
nhts_tr['starthr'] = nhts_tr.strttime // 100
nhtshr = nhts_tr.groupby('starthr').wttrdfin5d.sum() / nhts_tr.wttrdfin5d.sum() * 100

In [None]:
simhr = simhr.reindex(nhtshr.index)
f, ax = plt.subplots()
plt.bar(np.arange(len(simhr)) - 0.2, nhtshr , width=0.4, label='NHTS')
plt.bar(np.arange(len(simhr)) + 0.2, simhr, width=0.4, label='Simulated')
plt.xticks([0, 3, 6, 9, 12, 15, 18, 21], ['12a', '3', '6', '9', '12p', '3p', '6p', '9p'])
plt.legend()
ax.yaxis.set_major_formatter(ticker.FuncFormatter('{:.0f}%'.format))
plt.savefig('../../dissertation/fig/abm/departure_time_comparison.pdf', bbox_inches='tight')

In [None]:
np.sum((trip_mode_choice.trip_mode == 'SHARED3FREE') & (trip_mode_choice.depart.isin([5, 6, 7, 8, 9])))

In [None]:
s