## Prepare traffic demands

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

pd.options.display.max_columns = None  # display all columns

In [2]:
# for auto-reloading
%load_ext autoreload
%autoreload 2    

In [3]:
# need to set this environmental path everytime you moves the project root folder
os.environ['project_root'] = '/Users/geekduck/Dropbox/gt_survey'

sys.path.append(os.environ['project_root'])

In [4]:
from carpoolsim.basic_settings import *

## Load data
- trip inputs
- taz data

In [5]:
gt_survey = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "trips.shp"),
)
print(gt_survey.shape)
gt_survey.sample(2)

(1539, 7)


Unnamed: 0,trip_id,orig_lon,orig_lat,dest_lon,dest_lat,new_min,geometry
696,1066,-84.3935,33.7683,-84.397971,33.775766,600.0,POINT (-84.39350 33.76830)
726,1112,-84.4633,34.0025,-84.397971,33.775766,630.0,POINT (-84.46330 34.00250)


In [6]:
# load taz
tazs = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "tazs.shp")
)

print(tazs.shape)
tazs.sample(2)

(5873, 3)


Unnamed: 0,taz_id,group_id,geometry
3931,3932,Douglas,"POLYGON ((-84.75040 33.78773, -84.74966 33.786..."
2368,2369,Cobb,"POLYGON ((-84.45793 33.82465, -84.45789 33.824..."


In [7]:
from shapely import Point

# assign TAZ given long/lat
def get_taz_given_lon_lat(
    df_row, lon_col, lat_col, tazs=tazs
):
    point = Point(df_row[lon_col], df_row[lat_col])
    taz_geoms = tazs["geometry"].tolist()
    filt = [taz_geom.contains(point) for taz_geom in taz_geoms]
    
    if sum(filt) == 0:
        the_taz_info = -1, ""
    else:
        the_taz_info = tazs.loc[filt, ["taz_id", "group_id"]].iloc[0]
    return the_taz_info

gt_survey[["orig_taz", "O_region"]] = gt_survey.apply(
    get_taz_given_lon_lat,
    axis=1,
    lon_col="orig_lon", 
    lat_col="orig_lat",
)

gt_survey[["dest_taz", "D_region"]] = gt_survey.apply(
    get_taz_given_lon_lat,
    axis=1,
    lon_col="dest_lon", 
    lat_col="dest_lat",
)

In [8]:
gt_survey.sample(3)

Unnamed: 0,trip_id,orig_lon,orig_lat,dest_lon,dest_lat,new_min,geometry,orig_taz,O_region,dest_taz,D_region
1190,1896,-84.3169,33.7498,-84.397971,33.775766,450.0,POINT (-84.31690 33.74980),1646,DeKalb,484,Fulton
573,861,-84.3339,33.7217,-84.397971,33.775766,450.0,POINT (-84.33390 33.72170),1689,DeKalb,484,Fulton
453,681,-84.336,33.7691,-84.397971,33.775766,420.0,POINT (-84.33600 33.76910),1657,DeKalb,484,Fulton


In [9]:
print(gt_survey.shape)
filt = (gt_survey["orig_taz"] != -1)
gt_survey = gt_survey.loc[filt, :]
print(gt_survey.shape)

(1539, 11)
(1519, 11)


In [10]:
print(gt_survey.O_region.unique())
print(gt_survey.D_region.unique())

['Fulton' 'DeKalb' 'Carroll' 'Cobb' 'Fayette' 'Gwinnett' 'Douglas'
 'Paulding' 'Clayton' 'Cherokee' 'Forsyth' 'Rockdale' 'Hall' 'Henry'
 'Coweta' 'Newton' 'Barrow' 'Walton']
['Fulton']


In [11]:
# store results to data_outputs
gt_survey.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "trips.shp"
    ),
    crs="EPSG:4326"
)