## Explorative Data Analysis

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

pd.options.display.max_columns = None  # display all columns

In [2]:
# for auto-reloading
%load_ext autoreload
%autoreload 2    

In [3]:
# need to set this environmental path everytime you moves the project root folder
os.environ['project_root'] = '/Users/geekduck/Dropbox/gt_survey'

sys.path.append(os.environ['project_root'])

In [4]:
from carpoolsim.basic_settings import *

## Load data
- trip inputs
- taz data

In [10]:
gt_survey = pd.read_csv(
    os.path.join(os.environ['data_inputs'], "cleaned", "trips.csv"),
    index_col=0
)

print(gt_survey.shape)
gt_survey.sample(2)

(1545, 6)


Unnamed: 0,trip_id,orig_lon,orig_lat,dest_lon,dest_lat,new_min
116,157,-84.3757,33.7718,-84.397971,33.775766,540.0
1266,2018,-84.592,33.8972,-84.397971,33.775766,540.0


In [13]:
# load taz
tazs = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "tazs.shp")
)

print(tazs.shape)
tazs.sample(2)

(5873, 2)


Unnamed: 0,taz_id,geometry
5819,4319,"POLYGON ((-84.88473 33.33892, -84.88590 33.338..."
3154,1678,"POLYGON ((-84.32960 33.74125, -84.32818 33.739..."


In [21]:
# assign TAZ given long/lat
def get_taz_given_lon_lat(df_row, tazs=gdf):
    point = df_row["geometry"]
    taz_geoms = tazs["geometry"].tolist()
    filt = [taz_geom.contains(point) for taz_geom in taz_geoms]
    
    if sum(filt) == 0:
        the_taz = -1
    else:
        the_taz = tazs.loc[filt, "TAZS"].iloc[0]
    return the_taz

gt_survey["orig_taz"] = gt_survey.apply(get_taz_given_lon_lat, axis=1)

In [22]:
# add Geogia Tech as final destination
gt_survey["dest_taz"] = 484
gt_survey["dest_lon"] = -84.397971
gt_survey["dest_lat"] = 33.775766

In [23]:
filt = (gt_survey["orig_taz"] != -1)
gt_survey = gt_survey.loc[filt, :]
print(gt_survey.shape)

(1519, 151)


In [26]:
# load zone
import pickle

pickle_pth = os.path.join(
    os.environ['PROJ_LIB'],
    "data_inputs",
    'taz_zones.pickle'
)
with open(pickle_pth, 'rb') as handle:
    taz_zone = pickle.load(handle)

taz2region = {}
for region in taz_zone:
    lst = taz_zone[region]
    for taz in lst:
        taz2region[int(taz)] = region
# print(taz2region)
def map_taz2zone(taz):
    return taz2region[int(taz)]

gt_survey['O_region'] = gt_survey.orig_taz.apply(map_taz2zone)
gt_survey['D_region'] = gt_survey.dest_taz.apply(map_taz2zone)

In [27]:
display(gt_survey.head())
print(gt_survey.O_region.unique())
print(gt_survey.D_region.unique())

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration..in.seconds.,Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,Q66,Q73,Q73_2_TEXT,Q73_5_TEXT,Q2,Q68,Q67,Q3,Q3_6_TEXT,Q5,Q45,ZIPCODE,Q56_1,Q56_2,Q56_3,Q56_4,Q56_5,Q10,Q11,Q12_1,Q12_2,Q12_3,Q12_4,Q12_5,Q12_6,Q12_7,Q12_8,Q12_9,Q12_10,Q12_11,Q12_12,Q12_13,Q12_18,Q12_14,Q12_15,Q12_16,Q12_17,Q12_17_TEXT,Q13_1,Q13_2,Q13_3,Q13_1_TEXT,Q13_2_TEXT,Q13_3_TEXT,Q14_1,Q14_2,Q14_3,Q14_1_TEXT,Q14_2_TEXT,Q14_3_TEXT,Q15,Q16,Q16_3_TEXT,Q69,Q69_6_TEXT,Q17,Q18,Q19,Q55,Q20_1,Q20_2,Q20_3,Q20_4,Q20_5,Q20_6,Q20_7,Q20_8,Q20_9,Q20_10,Q20_11,Q20_13,Q20_14,Q20_16,Q20_17,Q20_18,Q20_19,Q20_20,Q20_21,Q20_22,Q20_22_TEXT,Q25,Q25_14_TEXT,Q64,Q26,Q26_16_TEXT,Q27,Q28,Q29,Q30_1,Q30_2,Q30_3,Q30_4,Q30_5,Q30_6,Q30_7,Q30_8,Q30_12,Q30_10,Q30_11,Q32_1,Q32_2,Q32_3,Q32_4,Q32_5,Q34_1,Q34_2,Q34_3,Q34_4,Q34_5,Q34_6,Q34_7,Q34_8,Q34_9,Q34_10,Q34_11,Q34_12,Q34_13,Q34_14,Q34_15,Q34_16,Q47,Q59,Q51,Q49,Q49_5_TEXT,Q48,Q50,Q61,Q62,Q65,Q35,Q38,depart_time,newmin,ori_lon,ori_lat,geometry,orig_taz,dest_taz,dest_lon,dest_lat,O_region,D_region
4,10/31/2022 14:28,10/31/2022 14:36,IP Address,100,454,True,10/31/2022 14:36,R_3I6RkopoEqc7Hhp,email,EN,Yes,Mostly use the TransLoc app,,,I live off campus and work or attend class at ...,,,Staff member,,20-39 minutes,4 to 10 miles,30344,Telecommute (work from home),Telecommute (work from home),"Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)",7:30am,5:00pm,,Don't like to depend on others for carpooling,,Enjoy the ride/prefer to drive,,,,,Need to leave quickly in an emergency,,,,,,,Safety concerns,,,,,,,,,,,,,,,,,,,,,No,,,,,,,,,,,,,,,Need to save money,,,,,,,Nothing would alter my current driving habits,,,MARTA bus,,,,,,,,,,,,,,,,,,,,,,,,"Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",$50 to $100,No,,,,,,,,,,Yes,2023-01-01 07:30:00,450.0,-84.448,33.6919,POINT (-84.448 33.6919),1087,484,-84.397971,33.775766,3_4,3_1
5,10/31/2022 14:31,10/31/2022 14:36,IP Address,100,263,True,10/31/2022 14:36,R_Olq3a5qleDd1xBv,email,EN,No,,,,I live off campus and work or attend class at ...,,,Faculty member,,40-59 minutes,4 to 10 miles,30310,MARTA rail,MARTA rail,MARTA rail,MARTA rail,Did not work/flexible schedue,7:30am,6:00pm,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,West End,Midtown station,,Walk,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","Yes, and I have used this program","No, I am not aware of this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, and I have used this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","No, I am not aware of this program",,No,,,,,,,,,,Yes,2023-01-01 07:30:00,450.0,-84.4232,33.7278,POINT (-84.4232 33.7278),945,484,-84.397971,33.775766,3_4,3_1
6,10/31/2022 14:32,10/31/2022 14:38,IP Address,100,320,True,10/31/2022 14:38,R_10xcqTHiRpQOw3x,email,EN,Yes,Mostly use the TransLoc app,,,I live off campus and work or attend class at ...,,,Staff member,,20-39 minutes,11 to 20 miles,30344,MARTA rail,MARTA rail,MARTA rail,MARTA rail,MARTA rail,8:00am,4:00pm,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,East Point,Midtown station,,Drive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"Yes, but I have NOT used this program","Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","Yes, and I have used this program","No, I am not aware of this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",,No,,,,,,,,,,Yes,2023-01-01 08:00:00,480.0,-84.448,33.6919,POINT (-84.448 33.6919),1087,484,-84.397971,33.775766,3_4,3_1
7,10/31/2022 14:31,10/31/2022 14:38,IP Address,100,447,True,10/31/2022 14:38,R_2upJNnJssGu0QmB,email,EN,No,,,,I live off campus and work or attend class at ...,,,Staff member,,20-39 minutes,4 to 10 miles,30345,Telecommute (work from home),Telecommute (work from home),"Drive alone (car, truck, motorcycle, moped)",Telecommute (work from home),Telecommute (work from home),8:30am,5:00pm,,,,,Irregular work schedule,,,,Need to leave quickly in an emergency,,,Transit takes too long,,,,,Anything else takes too much time,"Other, please specify",My husband also works at Georgia Tech. About a...,,,,,,,,,,,,,,,,,,No,,,,,,Availability of emergency ride home,"Expanded regional transit options (MARTA, ligh...",,,,,,"Increased financial incentives (e.g., transit ...",,,,,,,,,,,,Carpool,,,,,,,,,,,,,,,,,,,,,,,,"No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","No, I am not aware of this program","Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",less than $50,No,,,,,,,,,,Yes,2023-01-01 08:30:00,510.0,-84.287,33.8513,POINT (-84.287 33.8513),1559,484,-84.397971,33.775766,3_3,3_1
12,10/31/2022 14:35,10/31/2022 14:40,IP Address,100,287,True,10/31/2022 14:40,R_1ov90QGumvyToDU,email,EN,Yes,I do not access campus transit arrival/departu...,,,I live off campus and work or attend class at ...,,,Staff member,,40-59 minutes,21 to 30 miles,30038,"Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)",11:00am,7:00pm,,Don't like to depend on others for carpooling,,Enjoy the ride/prefer to drive,Irregular work schedule,,,,Need to leave quickly in an emergency,Transit options are not available where I live,,,,,,,,,,,,,,,,,,,,,,,,,,,No,,,,Ability to telecommute,,,"Expanded regional transit options (MARTA, ligh...",,,Flexible work schedule,,,,,,,,,,,,Nothing would alter my current driving habits,,,"None, I would not alter my current mode",,,,,,,,,,,,,,,,,,,,,,,,"Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","No, I am not aware of this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program",more than $200,Yes,,Walking,,,,No,,,,No,2023-01-01 11:00:00,660.0,-84.161,33.6823,POINT (-84.161 33.6823),1865,484,-84.397971,33.775766,5_1,3_1


['3_4' '3_3' '5_1' '3_2' '3_1' '8_1' '2_2' '1_1' '6_1' '4_1' '2_1' '1_2'
 '4_2' '6_2' '8_2' '5_2']
['3_1']


In [29]:
# store results to data_outputs
file_path_name = os.path.join(
    "data_outputs", "step1_gt_survey", "gt_survey.csv"
)
gt_survey.to_csv(file_path_name)