# Preprocessing Data

In [126]:
import os
import sys
sys.path.append('../')

# import geoplot as gplt
import holidays
us_holidays = holidays.UnitedStates()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates
import math
import datetime
import rtree
import sys

import geopandas as gpd
from shapely import wkt
from shapely.geometry import Polygon, Point
from pyproj import CRS

from setup import *

%matplotlib inline
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [326]:
period = 'after'
start_date = pd.to_datetime('20200302')
end_date = pd.to_datetime('20201031')

# period = 'before'
# start_date = pd.to_datetime('20190801')
# end_date = pd.to_datetime('20200301')


# Census Tract Shapefiles

In [128]:
# shapefile of census blocks 2010 
gblk = pd.read_csv(data_dir+'data_raw/CensusBlockTIGER2010.csv')
# craete a geometric object
gblk['the_geom'] = gblk['the_geom'].apply(wkt.loads)
# create the geo dataframe
block_gdf = gpd.GeoDataFrame(gblk, geometry='the_geom')
# Calculate the area of the blocks/polygons
block_gdf.crs = CRS('epsg:4326')
block_gdf = block_gdf.to_crs("epsg:26916")

In [177]:
tract_gdf = block_gdf.dissolve(by=['STATEFP10','COUNTYFP10','TRACTCE10'], 
                               as_index=False)[['STATEFP10','COUNTYFP10','TRACTCE10','the_geom']]
tract_gdf['area'] = tract_gdf['the_geom'].map(lambda p:p.area)
tract_gdf['GEOID10'] = tract_gdf['STATEFP10'].astype(str)+"_"+tract_gdf['COUNTYFP10'].astype(str)+"_"+tract_gdf['TRACTCE10'].astype(str)

In [162]:
print(len(tract_gdf), "census tracts.")

811 census tracts.


In [130]:
print('Census Tracts Columns: ', tract_gdf.columns.to_list())

Census Tracts Columns:  ['STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'the_geom', 'area']


# Public Transit

In [327]:
d = pd.DataFrame()
df_iter = pd.read_csv(doe_data_dir+"raw_data/mobility/pt_aug19_oct20.csv", iterator=True, chunksize=1000000)
for df_chunk in df_iter:
    df_chunk['day'] = pd.to_datetime(df_chunk['day'])
    df_chunk = df_chunk[((df_chunk['day']>=start_date)&(df_chunk['day']<=end_date))]
    df_chunk['dow'] = df_chunk['dow'].str.strip()
    d = pd.concat((d, df_chunk), ignore_index=True)
    
d['day'] = pd.to_datetime(d['day']).dt.date
d['dow'] = d['dow'].str.strip()
print('Columns:', d.columns.tolist())

Columns: ['week', 'day', 'dow', 'quarter_hour', 'mode', 'stop_name', 'lat', 'lon', 'count']


In [328]:
# edit the temporal dimension.
days = d.groupby('day').size().reset_index().iloc[:, :-1].reset_index()
days['holiday'] = [int(date in us_holidays) for date in days['day']]
days.to_csv(project_data_dir+period+"_dates.csv", index=False)
daydict = pd.Series(days.index.values,index=days.day).to_dict()

In [329]:
print(days.iloc[0]['day'], days.iloc[-1]['day'])

2020-03-02 2020-10-31


In [239]:
d['day_index'] = d['day'].map(daydict)
# create ts as quarter_hour index
d['ts'] = (d['day_index'])*24*4 + d['quarter_hour']
d.dropna(inplace=True)

In [240]:
# rail counts wrt census tracts
d_points = d[['stop_name','mode','lat','lon']].drop_duplicates().dropna()
point_geom = [Point(xy) for xy in zip(d_points['lon'],d_points['lat'])]
crs = CRS("epsg:4326")
pointdf = gpd.GeoDataFrame(d_points, crs=crs, geometry=point_geom)
pointdf = pointdf.to_crs("epsg:26916")

stop_census = gpd.sjoin(tract_gdf, pointdf, how="inner", op='intersects')

d_census = d.merge(stop_census, on=['stop_name', 'mode', 'lat', 'lon'])

  if (await self.run_code(code, result,  async_=asy)):


In [241]:
rail_df = d_census.loc[d_census['mode']=='CTA Rail']
rail_df = rail_df.sort_values(by=['GEOID10','day_index','quarter_hour']) 

In [242]:
rail_df = rail_df.groupby(['GEOID10', 'ts', 'day_index','day', 'dow', 'quarter_hour', 'mode'], as_index=False).sum()\
                [['GEOID10', 'ts', 'day_index', 'day', 'dow', 'quarter_hour', 'mode', 'count']]

In [243]:
bus_df = d_census.loc[d_census['mode']=='CTA Bus']
bus_df = bus_df.sort_values(by=['GEOID10','day_index','day','quarter_hour']) 

In [244]:
bus_df = bus_df.groupby(['GEOID10', 'ts', 'day_index', 'day', 'dow', 'quarter_hour', 'mode'], as_index=False).sum()\
                [['GEOID10', 'ts','day_index', 'day', 'dow', 'quarter_hour', 'mode', 'count']]
bus_df.columns = ['GEOID10', 'ts','day_index', 'day', 'dow', 'quarter_hour', 'mode', 'bus_count']

In [245]:
# save data
bus_df.to_csv(project_data_dir+'data_processed/census_tract/bus_df_'+start_date.strftime("%y%m%d")+'_'+end_date.strftime("%y%m%d")+'.csv', index=False)
rail_df.to_csv(project_data_dir+'data_processed/census_tract/rail_df_'+start_date.strftime("%y%m%d")+'_'+end_date.strftime("%y%m%d")+'.csv', index=False)

# TNC

In [330]:
tnc_files = [name[12:-4].split("_")+[name] for name in os.listdir(doe_data_dir+"processed_data/mobility/TNC") if name[:11]=='chicago_tnc']
tnc_files = pd.DataFrame(tnc_files, columns=['start_date','end_date','name'])
tnc_files['start_date'] = pd.to_datetime(tnc_files['start_date']).dt.date
tnc_files['end_date'] = pd.to_datetime(tnc_files['end_date']).dt.date
tnc_files.sort_values(by='start_date', inplace=True)

tnc = pd.DataFrame()
for s,e,name in zip(tnc_files['start_date'],tnc_files['end_date'],tnc_files['name']):        
    if (end_date >= s) | (start_date <= e):
        df_chunk = pd.read_csv(doe_data_dir+"processed_data/mobility/TNC/"+name)
        df_chunk['Trip Start Timestamp'] = pd.to_datetime(df_chunk['Trip Start Timestamp'])
        df_chunk = df_chunk.dropna(subset=['Pickup Census Tract'])
        if (end_date >= e) & (start_date <= s):
            print(name, 'all')
            tnc = tnc.append(df_chunk)
        else:
            print(name, 'part')
            tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])
  if (end_date >= s) | (start_date <= e):


chicago_tnc_20190601_20190831.csv part


  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])
  if (end_date >= s) | (start_date <= e):


chicago_tnc_20190901_20190930.csv part


  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])
  if (end_date >= s) | (start_date <= e):


chicago_tnc_20191001_20191231.csv part


  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])
  if (end_date >= s) | (start_date <= e):


chicago_tnc_20200101_20200229.csv part


  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20200301_20200331.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk)
  if (end_date >= s) | (start_date <= e):


chicago_tnc_20200401_20200630.csv all


  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk)


chicago_tnc_20200701_20200930.csv all


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk)


chicago_tnc_20201001_20201031.csv all


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20201101_20201130.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20201201_20201231.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210101_20210131.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210201_20210228.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210301_20210331.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210401_20210430.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210501_20210531.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210601_20210630.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210701_20210731.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210801_20210831.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20210901_20210930.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20211001_20211031.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20211101_20211130.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20211201_20211231.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20220101_20220131.csv part


  if (end_date >= s) | (start_date <= e):
  if (end_date >= e) & (start_date <= s):
  tnc = tnc.append(df_chunk[(df_chunk['Trip Start Timestamp']>=start_date)&(df_chunk['Trip Start Timestamp']<=end_date)])


chicago_tnc_20220201_20220228.csv part


In [331]:
tnc['start_hour'] = tnc['Trip Start Timestamp'].dt.hour
tnc['start_min'] = tnc['Trip Start Timestamp'].dt.minute
tnc['quarter_hour'] = tnc['start_hour']*4 + tnc['start_min']//15
tnc['day'] = tnc['Trip Start Timestamp'].dt.date
tnc['day_index'] = tnc['Trip Start Timestamp'].dt.date.map(daydict)
tnc['ts'] = tnc['day_index']*96 + tnc['quarter_hour']

tnc['Pickup Census Tract'] = tnc['Pickup Census Tract'].astype(int).astype(str)

In [332]:
tnc_out = tnc.groupby(['ts','day','day_index','quarter_hour','Pickup Census Tract'], as_index=False).count()\
[['ts','day','day_index','quarter_hour','Pickup Census Tract','Fare']]

In [333]:
tnc_out.columns = ['ts','day','day_index','quarter_hour','GEOID10','tnc_count']

In [334]:
tnc_out['STATEFP10'] = tnc_out['GEOID10'].str[:2].astype(int)
tnc_out['COUNTYFP10'] = tnc_out['GEOID10'].str[2:5].astype(int)
tnc_out['TRACTCE10'] = tnc_out['GEOID10'].str[5:].astype(int)

tnc_out['GEOID10'] = tnc_out['STATEFP10'].astype(str)+"_"+tnc_out['COUNTYFP10'].astype(str)+"_"+tnc_out['TRACTCE10'].astype(str)

tnc_out = tnc_out[['ts','day','day_index','quarter_hour','GEOID10','tnc_count']]

In [335]:
tnc_out.to_csv(project_data_dir+'data_processed/census_tract/tnc_df_'+start_date.strftime("%y%m%d")+'_'+end_date.strftime("%y%m%d")+'.csv', index=False)

# Demographic Data
2018 demographic data's resolution is block groups


This block groups is different from the census tract 10 block groups!!

In [146]:
blkgrps = gpd.read_file(data_dir+'data_raw/tl_2018_17_bg/tl_2018_17_bg.shp')
blkgrps['COUNTYFP'] = blkgrps['COUNTYFP'].astype(int)
blkgrps['TRACTCE'] = blkgrps['TRACTCE'].astype(int)
blkgrps['BLKGRPCE'] = blkgrps['BLKGRPCE'].astype(int)

blkgrps = blkgrps.to_crs("epsg:26916")
blkgrps['area'] = blkgrps['geometry'].map(lambda p: p.area)

In [147]:
# demographic information of census blocks
demos = pd.read_csv(project_data_dir+'data_raw/nhgis0004_ds239_20185_2018_blck_grp.csv')

In [148]:
data_cols = [x for x in list(demos.columns) if len(x) == 8 and x[0:2]=='AJ' and x[4]=='E']
# This will tell us if we have any non-numeic data types
# print(demo_to_grid[data_cols].dtypes.value_counts())
# Run this to find out which columns are non-numeric. Then can feed them to the next line
# demo_to_grid[data_cols].select_dtypes(include=['int64']).columns
data_cols.remove('AJ0EE001')

In [149]:
travel_time_cols = ['AJXEE0'+str(i).zfill(2) for i in range(2,14)]
tt_medians = [2.5,7,12,17,22,27,32,37,42,52,74.5,100]

# aggregate values for each station.
demo_tract = demos.groupby(['STATEA','COUNTYA','TRACTA'])[data_cols].sum().reset_index()

In [150]:
# create demographics for each cell.
demo_tract['pct25_34yrs'] = (demo_tract[['AJWBE011','AJWBE012',
                                         'AJWBE035','AJWBE036']].sum(axis = 1))/demo_tract['AJWBE001']
demo_tract['pct35_50yrs'] = (demo_tract[['AJWBE013','AJWBE014','AJWBE015',
                                         'AJWBE037','AJWBE038','AJWBE039']].sum(axis = 1))/demo_tract['AJWBE001']
demo_tract['pctover65yrs'] = (demo_tract[['AJWBE020','AJWBE021','AJWBE022','AJWBE023','AJWBE024','AJWBE025',
                                          'AJWBE044','AJWBE045','AJWBE046','AJWBE047','AJWBE048','AJWBE049']].sum(axis = 1))/demo_tract['AJWBE001']
demo_tract['pctwhite_alone'] = demo_tract['AJWNE002']/demo_tract['AJWNE001']
demo_tract['pct_nonwhite'] = (demo_tract['AJWNE001']-demo_tract['AJWNE002'])/demo_tract['AJWNE001']
demo_tract['pctblack_alone'] = demo_tract['AJWNE003']/demo_tract['AJWNE001']
demo_tract['pct_hs_grad'] = (demo_tract[['AJYPE017','AJYPE018','AJYPE019','AJYPE020','AJYPE021','AJYPE022',
                                         'AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))/demo_tract['AJYPE001']
demo_tract['pct_col_grad'] = (demo_tract[['AJYPE022','AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))/demo_tract['AJYPE001']
demo_tract['pctPTcommute'] = demo_tract['AJXCE010']/demo_tract['AJXCE001']

demo_tract['25_34yrs'] = (demo_tract[['AJWBE011','AJWBE012',
                                         'AJWBE035','AJWBE036']].sum(axis = 1))
demo_tract['35_50yrs'] = (demo_tract[['AJWBE013','AJWBE014','AJWBE015',
                                         'AJWBE037','AJWBE038','AJWBE039']].sum(axis = 1))
demo_tract['over65yrs'] = (demo_tract[['AJWBE020','AJWBE021','AJWBE022','AJWBE023','AJWBE024','AJWBE025',
                                          'AJWBE044','AJWBE045','AJWBE046','AJWBE047','AJWBE048','AJWBE049']].sum(axis = 1))
demo_tract['white_alone'] = demo_tract['AJWNE002']/demo_tract['AJWNE001']
demo_tract['nonwhite'] = demo_tract['AJWNE001']-demo_tract['AJWNE002']
demo_tract['black_alone'] = demo_tract['AJWNE003']
demo_tract['hs_grad'] = (demo_tract[['AJYPE017','AJYPE018','AJYPE019','AJYPE020','AJYPE021','AJYPE022',
                                         'AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))
demo_tract['col_grad'] = (demo_tract[['AJYPE022','AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))
demo_tract['PTcommute'] = demo_tract['AJXCE010']

demo_tract['avg_tt_to_work'] = (demo_tract[travel_time_cols].multiply(tt_medians, axis = 1).sum(axis =1))/demo_tract['AJXEE001']

# create income info
# demo_tract['pct60plusmincommute'] = (demo_tract[['AJXEE012','AJXEE013']].sum(axis=1))/demo_tract['AJXEE001']
cell_inc = demos.groupby(['STATEA','COUNTYA','TRACTA']).apply(lambda x: 
                    (x['AJWME001']*x['AJ0EE001']).sum(axis = 0)/x['AJWME001'].sum(axis=0)).reset_index()
cell_inc.columns = ['STATEA','COUNTYA','TRACTA','inc_per_capita']
demo_tract = pd.merge(demo_tract, cell_inc, how = 'left', on = ['STATEA','COUNTYA','TRACTA'])

  (x['AJWME001']*x['AJ0EE001']).sum(axis = 0)/x['AJWME001'].sum(axis=0)).reset_index()


In [151]:
# extract demo data
demo_df = demo_tract[['STATEA','COUNTYA','TRACTA','AJWME001',
                       'pct25_34yrs','pct35_50yrs','pctover65yrs',
                       'pctwhite_alone','pct_nonwhite','pctblack_alone',
                       'pct_col_grad','pctPTcommute',
                       '25_34yrs','35_50yrs','over65yrs',
                       'white_alone','nonwhite','black_alone',
                       'col_grad','PTcommute',
                       'avg_tt_to_work','inc_per_capita']].fillna(0)

demo_df = demo_df.rename(columns = {'AJWME001':'tot_population'})

# demo_colnames = ['tot_population','pct25_34yrs','pct35_50yrs','pctover65yrs',
#          'pctwhite_alone','pct_nonwhite','pctblack_alone',
#          'pct_col_grad','pctPTcommute','25_34yrs','35_50yrs','over65yrs',
#          'white_alone','nonwhite','black_alone',
#          'col_grad','PTcommute','avg_tt_to_work','inc_per_capita','area']

demo_df['GEOID10'] = demo_df['STATEA'].astype(str)+"_"+demo_df['COUNTYA'].astype(str)+"_"+demo_df['TRACTA'].astype(str)

In [178]:
poi = gpd.read_file(project_data_dir+'data_raw/POI_OSM/gis_osm_pois_free_1.shp')

non_attractions = ['water_tower','toilet','post_box','pitch','drinking_water','atm','comms_tower','prison',
                   'recycling','camera_surveillance','windmill','bench','waste_basket','water_well','telephone',
                  'vending_machine','recycling_metal','wastewater_plant','reclycling_glass','chemist',
                  'recycling_paper','wayside_cross','recycling_clothes']

### Use category data from Patrick
poi_cat = pd.read_csv(project_data_dir+'data_raw/POIcat.csv')

poi_cat.columns = ['fclass','cat','start_active_time','end_active_time']

poi_map = pd.Series(poi_cat.cat.values,index=poi_cat.fclass).to_dict()

poi['category'] = poi['fclass'].map(poi_map).fillna('other')

### further aggregate
cat2 = {'bar':'restaurant','entertainment':'entertainment', 'hotel':'entertainment', 
        'public attraction':'entertainment', 'restaurant':'restaurant',
        'school':'school', 'services':'shop', 'shop':'shop'}
poi['category_2'] = poi['category'].map(cat2)
poi.dropna(inplace=True)

categories = list(poi['category_2'].unique())

poi = poi[~poi['fclass'].isin(non_attractions)]
poi.crs = CRS('epsg:4326')
poi = poi.to_crs("epsg:26916")

In [184]:
poi_census = gpd.sjoin(poi, tract_gdf, how="inner", op='intersects')
p = poi_census.groupby(['GEOID10','category_2']).size().unstack().reset_index().fillna(0)
p['all'] = p[categories].sum(axis = 1)

# save
demo_df = pd.merge(demo_df, p, on=['GEOID10'])

  if (await self.run_code(code, result,  async_=asy)):


In [152]:
# job
jobs = pd.read_csv(project_data_dir+'data_raw/il_wac_S000_JT00_2017.csv', usecols = [0,1])
jobs.columns = ['GEOID10','jobs']

jobs['GEOID10'] = jobs['GEOID10'].astype(str)
jobs['STATEA'] = jobs['GEOID10'].str[:2].astype(int)
jobs['COUNTYA'] = jobs['GEOID10'].str[2:5].astype(int)
jobs['TRACTA'] = jobs['GEOID10'].str[5:11].astype(int)
jobs = jobs.groupby(['STATEA','COUNTYA','TRACTA'])['jobs'].sum().reset_index()

demo_df = pd.merge(demo_df, jobs, on=['STATEA','COUNTYA','TRACTA'])

In [186]:
# save demographics data
demo_df.to_csv(project_data_dir+"data_processed/census_tract/other/spatial.csv", index=False)

# Weather Data

In [154]:
weather = pd.read_csv(project_data_dir+'data_raw/weather.csv', parse_dates = ['DATE'])
weather = weather[weather['NAME']=='CHICAGO OHARE INTERNATIONAL AIRPORT, IL US']

weather.fillna(0, inplace=True)
weather.sort_values(by = 'DATE', inplace=True)
weather = weather[(weather['DATE']>=start_date)&(weather['DATE']<=end_date)]

weather['week'] = weather.DATE.dt.isocalendar().week
weather['year'] = weather.DATE.dt.isocalendar().year
weather['TAVGAVG'] = weather.groupby(['week','year']).transform(np.mean)['TAVG']

weather['PRCP'] = weather['PRCP'] + weather['SNOW']

weather['DAY_INDEX'] = weather['DATE'].map(daydict)
weather[['DATE','DAY_INDEX','PRCP','TAVGAVG','TAVG']].to_csv(project_data_dir+'data_processed/census_tract/weather_'+start_date.strftime("%y%m%d")+'_'+end_date.strftime("%y%m%d")+'.csv', index=False)


  weather['TAVGAVG'] = weather.groupby(['week','year']).transform(np.mean)['TAVG']


# Level of Service: Bus Frequency

In [155]:
# Get GFTS Versions
gtfs_versions = [name.split("-") for name in os.listdir(project_data_dir+"data_raw/CTA_GTFS")]
gtfs_versions = pd.DataFrame(gtfs_versions, columns=['start_date','end_date'])
gtfs_versions['start_date'] = pd.to_datetime(gtfs_versions['start_date']).dt.date
gtfs_versions['end_date'] = pd.to_datetime(gtfs_versions['end_date']).dt.date
gtfs_versions = gtfs_versions.sort_values('end_date', ascending=False).reset_index(drop=True)

In [156]:
def get_service_id(path, cur_date):
    
    path = project_data_dir + "data_raw/CTA_GTFS/"+folder
#     s = pd.read_csv(path+'/stops.txt', dtype={'trip_id':str})
#     r = pd.read_csv(path+'/routes.txt') 
#     t = pd.read_csv(path+'/trips.txt', dtype={'service_id':str, 'trip_id':str, 'shape_id':str, 'schd_trip_id':str})
#     st = pd.read_csv(path+'/stop_times.txt', dtype={'trip_id':str})
    c = pd.read_csv(path+'/calendar.txt', dtype={'service_id':str, 'start_date':str, 'end_date':str})
    cd = pd.read_csv(path+'/calendar_dates.txt', dtype={'service_id':str, 'date':str})

    c['start_date'] = pd.to_datetime(c['start_date'])
    c['end_date'] = pd.to_datetime(c['end_date'])
    cd['date'] = pd.to_datetime(cd['date'])

    # get service_ids from calendar.txt
    dow_col = cur_date.day_name().lower()
    service_ids = c[(c[dow_col] == 1) & (c['start_date']<=cur_date) & (c['end_date']>=cur_date)]['service_id']

    # modify according to calendar_dates.txt
    service_ids = pd.concat([service_ids, cd[(cd['date']==cur_date) & (cd['exception_type']==1)]['service_id']])
    service_ids = service_ids[~service_ids.isin(cd[(cd['date']==cur_date) & (cd['exception_type']==2)]['service_id'])]

    return service_ids.to_list()

In [157]:
def get_counts_from_gtfs(path, cur_date):
    
    path = project_data_dir + "data_raw/CTA_GTFS/"+folder
    s = pd.read_csv(path+'/stops.txt', dtype={'trip_id':str})
    r = pd.read_csv(path+'/routes.txt') 
    t = pd.read_csv(path+'/trips.txt', dtype={'service_id':str, 'trip_id':str, 'shape_id':str, 'schd_trip_id':str})
    st = pd.read_csv(path+'/stop_times.txt', dtype={'trip_id':str})
    c = pd.read_csv(path+'/calendar.txt', dtype={'service_id':str, 'start_date':str, 'end_date':str})
    cd = pd.read_csv(path+'/calendar_dates.txt', dtype={'service_id':str, 'date':str})

    c['start_date'] = pd.to_datetime(c['start_date'])
    c['end_date'] = pd.to_datetime(c['end_date'])
    cd['date'] = pd.to_datetime(cd['date'])

    # get service_ids from calendar.txt
    dow_col = cur_date.day_name().lower()
    service_ids = c[(c[dow_col] == 1) & (c['start_date']<=cur_date) & (c['end_date']>=cur_date)]['service_id']

    # modify according to calendar_dates.txt
    service_ids = pd.concat([service_ids, cd[(cd['date']==cur_date) & (cd['exception_type']==1)]['service_id']])
    service_ids = service_ids[~service_ids.isin(cd[(cd['date']==cur_date) & (cd['exception_type']==2)]['service_id'])]

    # get ids of scheduled trips from trips.txt
    schd_trips = t[t['service_id'].isin(service_ids.tolist())]

    # filter to bus routes
    r = r[r['route_type']==3]
   
    # count stop times for identified trips and stops
    stt = st.merge(s, on='stop_id')
    stt = stt.merge(schd_trips, how = 'inner', on = 'trip_id')
    stt = stt.merge(r, how = 'inner', on = 'route_id')
    stt['arrival_time'] = stt['arrival_time'].astype(str)
    stt['arrival_hour'] = stt['arrival_time'].str[:2].astype(int)
    stt['arrival_min'] = stt['arrival_time'].str[3:5].astype(int)
    stt['quarter_hour'] = stt['arrival_hour'] * 4 + stt['arrival_min']//15
    stt['ts'] = ts_day_base + stt['quarter_hour']
    
    # spatial join with census tracts
    point_geom = [Point(xy) for xy in zip(stt['stop_lon'],stt['stop_lat'])]
    stt = gpd.GeoDataFrame(stt, crs=CRS("epsg:4326"), geometry=point_geom)
    stt = stt.to_crs(CRS("epsg:26916"))    
    
    stt = gpd.sjoin(tract_gdf, stt, how='left', op='contains')
 
    # 
    counts = stt.groupby(['ts','quarter_hour','GEOID10'], as_index=False).count()\
                    [['ts','quarter_hour','GEOID10','trip_id']]
    
    return counts

In [158]:
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + datetime.timedelta(n)

ts_day_base = 0
bus_freq = pd.DataFrame([])    
for cur_date in daterange(start_date, end_date):
    day_index = daydict[cur_date.date()]
    ver = gtfs_versions[(cur_date >= pd.to_datetime(gtfs_versions['start_date'])) \
                        & (cur_date <= pd.to_datetime(gtfs_versions['end_date']))]
    print(day_index, cur_date, end='\t')
    if len(ver)==0:
        ts_day_base += 96
        print(cur_date, 'not found!')
        continue
    else:
        folder = datetime.datetime.strftime(gtfs_versions['start_date'].iloc[ver.index[0]],"%Y%m%d") + '-' + \
                datetime.datetime.strftime(gtfs_versions['end_date'].iloc[ver.index[0]],"%Y%m%d")
        new = get_counts_from_gtfs(folder, cur_date)
        
        new['day_index'] = day_index
        new['day'] = cur_date
        bus_freq = pd.concat([bus_freq, new])
        ts_day_base += 96


0 2020-03-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


1 2020-03-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


2 2020-03-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


3 2020-03-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


4 2020-03-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


5 2020-03-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


6 2020-03-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


7 2020-03-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


8 2020-03-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


9 2020-03-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


10 2020-03-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


11 2020-03-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


12 2020-03-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


13 2020-03-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


14 2020-03-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


15 2020-03-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


16 2020-03-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


17 2020-03-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


18 2020-03-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


19 2020-03-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


20 2020-03-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


21 2020-03-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


22 2020-03-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


23 2020-03-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


24 2020-03-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


25 2020-03-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


26 2020-03-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


27 2020-03-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


28 2020-03-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


29 2020-03-31 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


30 2020-04-01 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


31 2020-04-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


32 2020-04-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


33 2020-04-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


34 2020-04-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


35 2020-04-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


36 2020-04-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


37 2020-04-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


38 2020-04-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


39 2020-04-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


40 2020-04-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


41 2020-04-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


42 2020-04-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


43 2020-04-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


44 2020-04-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


45 2020-04-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


46 2020-04-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


47 2020-04-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


48 2020-04-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


49 2020-04-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


50 2020-04-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


51 2020-04-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


52 2020-04-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


53 2020-04-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


54 2020-04-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


55 2020-04-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


56 2020-04-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


57 2020-04-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


58 2020-04-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


59 2020-04-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


60 2020-05-01 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


61 2020-05-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


62 2020-05-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


63 2020-05-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


64 2020-05-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


65 2020-05-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


66 2020-05-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


67 2020-05-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


68 2020-05-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


69 2020-05-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


70 2020-05-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


71 2020-05-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


72 2020-05-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


73 2020-05-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


74 2020-05-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


75 2020-05-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


76 2020-05-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


77 2020-05-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


78 2020-05-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


79 2020-05-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


80 2020-05-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


81 2020-05-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


82 2020-05-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


83 2020-05-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


84 2020-05-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


85 2020-05-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


86 2020-05-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


87 2020-05-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


88 2020-05-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


89 2020-05-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


90 2020-05-31 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


91 2020-06-01 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


92 2020-06-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


93 2020-06-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


94 2020-06-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


95 2020-06-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


96 2020-06-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


97 2020-06-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


98 2020-06-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


99 2020-06-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


100 2020-06-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


101 2020-06-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


102 2020-06-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


103 2020-06-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


104 2020-06-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


105 2020-06-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


106 2020-06-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


107 2020-06-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


108 2020-06-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


109 2020-06-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


110 2020-06-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


111 2020-06-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


112 2020-06-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


113 2020-06-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


114 2020-06-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


115 2020-06-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


116 2020-06-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


117 2020-06-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


118 2020-06-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


119 2020-06-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


120 2020-06-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


121 2020-07-01 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


122 2020-07-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


123 2020-07-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


124 2020-07-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


125 2020-07-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


126 2020-07-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


127 2020-07-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


128 2020-07-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


129 2020-07-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


130 2020-07-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


131 2020-07-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


132 2020-07-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


133 2020-07-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


134 2020-07-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


135 2020-07-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


136 2020-07-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


137 2020-07-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


138 2020-07-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


139 2020-07-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


140 2020-07-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


141 2020-07-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


142 2020-07-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


143 2020-07-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


144 2020-07-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


145 2020-07-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


146 2020-07-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


147 2020-07-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


148 2020-07-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


149 2020-07-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


150 2020-07-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


151 2020-07-31 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


152 2020-08-01 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


153 2020-08-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


154 2020-08-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


155 2020-08-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


156 2020-08-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


157 2020-08-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


158 2020-08-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


159 2020-08-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


160 2020-08-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


161 2020-08-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


162 2020-08-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


163 2020-08-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


164 2020-08-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


165 2020-08-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


166 2020-08-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


167 2020-08-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


168 2020-08-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


169 2020-08-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


170 2020-08-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


171 2020-08-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


172 2020-08-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


173 2020-08-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


174 2020-08-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


175 2020-08-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


176 2020-08-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


177 2020-08-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


178 2020-08-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


179 2020-08-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


180 2020-08-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


181 2020-08-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


182 2020-08-31 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


183 2020-09-01 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


184 2020-09-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


185 2020-09-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


186 2020-09-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


187 2020-09-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


188 2020-09-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


189 2020-09-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


190 2020-09-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


191 2020-09-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


192 2020-09-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


193 2020-09-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


194 2020-09-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


195 2020-09-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


196 2020-09-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


197 2020-09-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


198 2020-09-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


199 2020-09-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


200 2020-09-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


201 2020-09-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


202 2020-09-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


203 2020-09-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


204 2020-09-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


205 2020-09-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


206 2020-09-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


207 2020-09-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


208 2020-09-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


209 2020-09-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


210 2020-09-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


211 2020-09-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


212 2020-09-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


213 2020-10-01 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


214 2020-10-02 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


215 2020-10-03 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


216 2020-10-04 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


217 2020-10-05 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


218 2020-10-06 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


219 2020-10-07 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


220 2020-10-08 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


221 2020-10-09 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


222 2020-10-10 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


223 2020-10-11 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


224 2020-10-12 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


225 2020-10-13 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


226 2020-10-14 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


227 2020-10-15 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


228 2020-10-16 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


229 2020-10-17 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


230 2020-10-18 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


231 2020-10-19 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


232 2020-10-20 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


233 2020-10-21 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


234 2020-10-22 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


235 2020-10-23 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


236 2020-10-24 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


237 2020-10-25 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


238 2020-10-26 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


239 2020-10-27 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


240 2020-10-28 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


241 2020-10-29 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


242 2020-10-30 00:00:00	

  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
bus_freq.columns = ['ts','quarter_hour','GEOID10','num_schd_trp_15min', 'day_index', 'day']
bus_freq.to_csv(project_data_dir+'data_processed/census_tract/los_df_'+start_date.strftime("%y%m%d")+'_'+end_date.strftime("%y%m%d")+'.csv', index=False)