## Prepare traffic demands

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

pd.options.display.max_columns = None  # display all columns

In [2]:
# for auto-reloading
%load_ext autoreload
%autoreload 2    

In [3]:
# need to set this environmental path everytime you moves the project root folder
os.environ['project_root'] = '/Users/geekduck/Dropbox/gt_survey'

sys.path.append(os.environ['project_root'])

In [4]:
from carpoolsim.basic_settings import *

## Load data
- trip inputs
- taz data

In [5]:
gt_survey = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "trips.shp"),
)
print(gt_survey.shape)
gt_survey.sample(2)

(1545, 7)


Unnamed: 0,trip_id,orig_lon,orig_lat,dest_lon,dest_lat,new_min,geometry
181,258,-84.3883,33.7984,-84.397971,33.775766,570.0,POINT (-84.38830 33.79840)
418,613,-84.147,34.026,-84.397971,33.775766,480.0,POINT (-84.14700 34.02600)


In [6]:
# load taz
tazs = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "tazs.shp")
)

print(tazs.shape)
tazs.sample(2)

(5873, 3)


Unnamed: 0,taz_id,group_id,geometry
5197,3607,Clayton,"POLYGON ((-84.29469 33.43579, -84.29468 33.435..."
1023,2880,Gwinnett,"POLYGON ((-84.05892 34.14503, -84.05828 34.144..."


In [7]:
from shapely import Point

# assign TAZ given long/lat
def get_taz_given_lon_lat(
    df_row, lon_col, lat_col, tazs=tazs
):
    point = Point(df_row[lon_col], df_row[lat_col])
    taz_geoms = tazs["geometry"].tolist()
    filt = [taz_geom.contains(point) for taz_geom in taz_geoms]
    
    if sum(filt) == 0:
        the_taz_info = -1, ""
    else:
        the_taz_info = tazs.loc[filt, ["taz_id", "group_id"]].iloc[0]
    return the_taz_info

gt_survey[["orig_taz", "O_region"]] = gt_survey.apply(
    get_taz_given_lon_lat,
    axis=1,
    lon_col="orig_lon", 
    lat_col="orig_lat",
)

gt_survey[["dest_taz", "D_region"]] = gt_survey.apply(
    get_taz_given_lon_lat,
    axis=1,
    lon_col="dest_lon", 
    lat_col="dest_lat",
)

In [8]:
gt_survey.sample(3)

Unnamed: 0,trip_id,orig_lon,orig_lat,dest_lon,dest_lat,new_min,geometry,orig_taz,O_region,dest_taz,D_region
506,749,-84.302,34.1124,-84.397971,33.775766,720.0,POINT (-84.30200 34.11240),49,Fulton,484,Fulton
1085,1715,-84.3757,33.7718,-84.397971,33.775766,660.0,POINT (-84.37570 33.77180),569,Fulton,484,Fulton
635,963,-84.8194,33.3696,-84.397971,33.775766,420.0,POINT (-84.81940 33.36960),4344,Coweta,484,Fulton


In [9]:
print(gt_survey.shape)
filt = (gt_survey["orig_taz"] != -1)
gt_survey = gt_survey.loc[filt, :]
print(gt_survey.shape)

(1545, 11)
(1519, 11)


In [10]:
print(gt_survey.O_region.unique())
print(gt_survey.D_region.unique())

['Fulton' 'DeKalb' 'Carroll' 'Cobb' 'Fayette' 'Gwinnett' 'Douglas'
 'Paulding' 'Clayton' 'Cherokee' 'Forsyth' 'Rockdale' 'Hall' 'Henry'
 'Coweta' 'Newton' 'Barrow' 'Walton']
['Fulton']


In [13]:
# store results to data_outputs
gt_survey.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "trips.shp"
    ),
    crs="EPSG:4326"
)