# Convert to a standardized version of dataframe

A converter to convert data frame to a standard format for carpoolsim:
1. Traffic network links
2. Traffic network nodes
3. Traffic TAZs (polygons)
4. Traffic demands 

In [1]:
import time
import copy
import os
import sys

import numpy as np
import pandas as pd
import geopandas as gpd

%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = None  # display all columns

In [2]:
from carpoolsim.basic_settings import *

In [3]:
# need to set this environmental path everytime you moves the project root folder
os.environ['project_root'] = '/Users/geekduck/Dropbox/gt_survey'

In [30]:
# load traffic network data
tazs_raw = gpd.read_file(os.environ["taz"])
df_nodes_raw = gpd.read_file(os.environ['network_nodes'])

In [35]:
df_links_raw = gpd.read_file(os.environ['network_links'])

In [40]:
from carpoolsim.dataclass.traffic_network import (
    TrafficNetworkLink,
    TrafficNetworkNode,
    TrafficAnalysisZone,
)

In [17]:
# clean node object
node_name_map = {
    "N": "nid",
    "lat": "lat",
    "lon": "lon",
    "X": "x",
    "Y": "y",
    "geometry": "geometry"
}
df_nodes_raw = df_nodes_raw.rename(columns=node_name_map)

df_nodes_lst = []
for index, row in df_nodes_raw.iterrows():
    df_nodes_lst.append(
        TrafficNetworkNode(
            row["nid"], row["lon"], row["lat"],
            row["x"], row["y"], row["geometry"])
    )

In [49]:
# clean link object
link_name_map = {
    "A": "a",
    "B": "b",
    "NAME": "name",
    "DISTANCE": "distance",
    "FACTYPE": "factype",
    "geometry": "geometry",
    "SPEED_LIMI": "speed_limit"
}
df_links_raw = df_links_raw.rename(columns=link_name_map)
df_links_raw["a"] = df_links_raw["a"].astype(str)
df_links_raw["b"] = df_links_raw["b"].astype(str)
df_links_raw["a_b"] = df_links_raw["a"] + "_" + df_links_raw["b"]

df_links_lst = []
for index, row in df_links_raw.iterrows():
    df_links_lst.append(
        TrafficNetworkLink(
            row["a"], row["b"], row["a_b"], row["name"],
            row["distance"], row["factype"], row["speed_limit"],
            row["geometry"]
        )
    )

In [50]:
# clean taz object
taz_name_map = {
    "OBJECTID": "taz_id",
    "COUNTY": "group_id",
    "geometry": "geometry",
}
tazs_raw = tazs_raw.rename(columns=taz_name_map)


tazs_lst = []
for index, row in tazs_raw.iterrows():
    tazs_lst.append(
        TrafficAnalysisZone(
            row["taz_id"],
            row["group_id"],
            row["geometry"],
        )
    )

In [51]:
# convert data sets base to data frames
df_nodes = gpd.GeoDataFrame(
    df_nodes_lst,
    crs="EPSG:4326",
)
df_links = gpd.GeoDataFrame(
    df_links_lst,
    crs=CRS,
)
tazs = gpd.GeoDataFrame(
    tazs_lst,
    crs="EPSG:4326",
)

In [52]:
df_links = df_links.to_crs(
    crs="EPSG:4326"
)

In [54]:
# create folder if not exist
os.makedirs(
    os.path.join(os.environ['data_inputs'], "cleaned"),
    exist_ok=True)
# store cleaned results to shapefiles
tazs.to_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "tazs.shp")
)
df_nodes.to_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "nodes.shp")
)
df_links.to_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "links.shp")
)

  df_links.to_file(
  ogr_write(


### Prepare pnr stations

In [55]:
from carpoolsim.dataclass.parking_lots import (
    ParkAndRideStation
)
from carpoolsim.dataclass.travel_demands import (
    TripDemand
)

In [65]:
pnr_lots = gpd.read_file(os.environ['parking_lots'])

In [66]:
pnr_lots.head(2)

Unnamed: 0,N,X,Y,STATION,STAFLAG,PNR,PNR_MAJOR,GEOMETRYSO,lat,lon,geometry
0,16221,2303925.0,1521079.0,,0,1,1,1,34.18142,-84.142391,POINT (-84.14239 34.18142)
1,19240,2195552.0,1538907.0,,0,1,1,1,34.229958,-84.50093,POINT (-84.50093 34.22996)


In [70]:
pnr_name_map = {
    "N": "station_id",
    "STATION": "name",
    "SPACES": "capacity",
    "geometry": "geometry"
}
pnr_lots = pnr_lots.rename(columns=pnr_name_map)

pnr_lots['lon'] = pnr_lots.geometry.x
pnr_lots['lat'] = pnr_lots.geometry.y
pnr_lots['capacity'] = 200

In [71]:
pnr_lots.head(2)

Unnamed: 0,station_id,X,Y,name,STAFLAG,PNR,PNR_MAJOR,GEOMETRYSO,lat,lon,geometry,capacity
0,16221,2303925.0,1521079.0,,0,1,1,1,34.18142,-84.142391,POINT (-84.14239 34.18142),200
1,19240,2195552.0,1538907.0,,0,1,1,1,34.229958,-84.50093,POINT (-84.50093 34.22996),200


In [73]:
pnr_lst = []
for index, row in pnr_lots.iterrows():
    pnr_lst.append(
        ParkAndRideStation(
            row["station_id"], row["name"],
            row["lon"], row["lat"], row["capacity"],
            row["geometry"]
        )
    )

In [80]:
pnrs = gpd.GeoDataFrame(pnr_lst)

In [82]:
pnrs = pnrs.set_crs(
    crs="EPSG:4326"
)

In [84]:
pnrs.to_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "pnrs.shp"),
)

## Prepare traffic demands
For trip, need to add:
- spatial information
- temporal information

In [86]:
gt_survey = pd.read_csv(os.environ['trip_demands'], index_col=0)

In [87]:
gt_survey["trip_id"] = gt_survey.index

In [88]:
# a special input
gt_survey.sample(2)

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration..in.seconds.,Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,Q66,Q73,Q73_2_TEXT,Q73_5_TEXT,Q2,Q68,Q67,Q3,Q3_6_TEXT,Q5,Q45,ZIPCODE,Q56_1,Q56_2,Q56_3,Q56_4,Q56_5,Q10,Q11,Q12_1,Q12_2,Q12_3,Q12_4,Q12_5,Q12_6,Q12_7,Q12_8,Q12_9,Q12_10,Q12_11,Q12_12,Q12_13,Q12_18,Q12_14,Q12_15,Q12_16,Q12_17,Q12_17_TEXT,Q13_1,Q13_2,Q13_3,Q13_1_TEXT,Q13_2_TEXT,Q13_3_TEXT,Q14_1,Q14_2,Q14_3,Q14_1_TEXT,Q14_2_TEXT,Q14_3_TEXT,Q15,Q16,Q16_3_TEXT,Q69,Q69_6_TEXT,Q17,Q18,Q19,Q55,Q20_1,Q20_2,Q20_3,Q20_4,Q20_5,Q20_6,Q20_7,Q20_8,Q20_9,Q20_10,Q20_11,Q20_13,Q20_14,Q20_16,Q20_17,Q20_18,Q20_19,Q20_20,Q20_21,Q20_22,Q20_22_TEXT,Q25,Q25_14_TEXT,Q64,Q26,Q26_16_TEXT,Q27,Q28,Q29,Q30_1,Q30_2,Q30_3,Q30_4,Q30_5,Q30_6,Q30_7,Q30_8,Q30_12,Q30_10,Q30_11,Q32_1,Q32_2,Q32_3,Q32_4,Q32_5,Q34_1,Q34_2,Q34_3,Q34_4,Q34_5,Q34_6,Q34_7,Q34_8,Q34_9,Q34_10,Q34_11,Q34_12,Q34_13,Q34_14,Q34_15,Q34_16,Q47,Q59,Q51,Q49,Q49_5_TEXT,Q48,Q50,Q61,Q62,Q65,Q35,Q38,trip_id
2380,11/13/2022 21:54,11/14/2022 0:02,IP Address,96,7659,False,11/20/2022 21:54,R_12QIx5B7Q6zTQyx,email,EN,Yes,Mostly use the TransLoc app,,,I live off campus and work or attend class at ...,,,Undergraduate student,,20-39 minutes,fewer than 4 miles,3038.0,"Campus transit (Stinger, Stingerette, etc.)","Campus transit (Stinger, Stingerette, etc.)","Campus transit (Stinger, Stingerette, etc.)","Campus transit (Stinger, Stingerette, etc.)","Campus transit (Stinger, Stingerette, etc.)",12:30pm,11:30pm,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",,Yes,I rent a scooter from a shared mobility provider,Campus Transit,,Clough Undergraduate Learning Commons,,No,,,Sometimes bus drivers skip stops and TransLoc ...,Yes,2380
1752,11/9/2022 16:09,11/9/2022 17:40,IP Address,100,5420,True,11/9/2022 17:40,R_1dzUGxyguTKvv5a,email,EN,Yes,Mostly use the TransLoc app,,,I live off campus and work or attend class at ...,,,Undergraduate student,,40-59 minutes,more than 30 miles,30087.0,"Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)",10:00am,10:00am,,Don't like to depend on others for carpooling,,,,,,Need my car at work for personal business,Need to leave quickly in an emergency,,,,,,,,,,,,,,,,,,,,,,,,,,,,No,,,,,,,,,,,,Increased cost and/or inconvenience of driving...,,,Need to save money,,"Safe, convenient bike paths and routes",,,,,,,,"None, I would not alter my current mode",,,,,,,,,,,,,,,,,,,,,,,,"No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",more than $200,No,,,,,,,,,a parking pass would be a blessing,Yes,1752


In [89]:
# filter out nan values for zip code
gt_survey = gt_survey[gt_survey['ZIPCODE'].notna()]
gt_survey["ZIPCODE"] = gt_survey["ZIPCODE"].astype(int)

In [90]:
gt_survey.Q2.unique()

array(['I live off campus and work or attend class at the main Georgia Tech campus in Midtown Atlanta',
       'I live on campus in campus housing (residence halls)',
       'I live on campus in Greek housing'], dtype=object)

In [91]:
# Q10: home to work time
# Q11: work to home time
gt_survey.Q10.unique()

array(['7:30am', '8:00am', '8:30am', '11:00am', '9:00am', '10:00am',
       '7:00am', '9:30am', '8:00pm', '9:00pm', '9:30pm', '7:00pm',
       '11:30am', '6:00am', nan, '2:00pm', '6:30am', '12:00pm', '10:30am',
       '12:30pm', '12:00am - 5:30am', '3:30pm', '6:30pm', '1:00pm',
       '1:30pm', '4:00pm', '7:30pm', '5:00pm', '3:00pm', '10:30pm',
       '8:30pm', '2:30pm'], dtype=object)

In [92]:
filt = (gt_survey.Q10.notna())
gt_survey = gt_survey[gt_survey.Q10.notna()]
print(gt_survey.shape)

(1545, 143)


In [93]:
# If earlier than 5:30 AM, change it to 5:30 AM
gt_survey['Q10'] = gt_survey['Q10'].str.replace(
    "12:00am - 5:30am",
    "5:30am"
)

In [94]:
# 2023-01-01 is the pseudo date
depart_time = "2023-01-01 " + gt_survey.Q10

gt_survey['depart_time'] = pd.to_datetime(
    depart_time,
    format=r"%Y-%m-%d %I:%M%p"
)

In [95]:
def extract_newmins(df_row):
    tm = (df_row["depart_time"] - pd.to_datetime("2023-01-01")
         ).total_seconds() / 60
    return tm

gt_survey['newmin'] = gt_survey['depart_time'] - pd.to_datetime("2023-01-01")
gt_survey['newmin'] = gt_survey.apply(extract_newmins, axis=1)
gt_survey.sample(2)

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration..in.seconds.,Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,Q66,Q73,Q73_2_TEXT,Q73_5_TEXT,Q2,Q68,Q67,Q3,Q3_6_TEXT,Q5,Q45,ZIPCODE,Q56_1,Q56_2,Q56_3,Q56_4,Q56_5,Q10,Q11,Q12_1,Q12_2,Q12_3,Q12_4,Q12_5,Q12_6,Q12_7,Q12_8,Q12_9,Q12_10,Q12_11,Q12_12,Q12_13,Q12_18,Q12_14,Q12_15,Q12_16,Q12_17,Q12_17_TEXT,Q13_1,Q13_2,Q13_3,Q13_1_TEXT,Q13_2_TEXT,Q13_3_TEXT,Q14_1,Q14_2,Q14_3,Q14_1_TEXT,Q14_2_TEXT,Q14_3_TEXT,Q15,Q16,Q16_3_TEXT,Q69,Q69_6_TEXT,Q17,Q18,Q19,Q55,Q20_1,Q20_2,Q20_3,Q20_4,Q20_5,Q20_6,Q20_7,Q20_8,Q20_9,Q20_10,Q20_11,Q20_13,Q20_14,Q20_16,Q20_17,Q20_18,Q20_19,Q20_20,Q20_21,Q20_22,Q20_22_TEXT,Q25,Q25_14_TEXT,Q64,Q26,Q26_16_TEXT,Q27,Q28,Q29,Q30_1,Q30_2,Q30_3,Q30_4,Q30_5,Q30_6,Q30_7,Q30_8,Q30_12,Q30_10,Q30_11,Q32_1,Q32_2,Q32_3,Q32_4,Q32_5,Q34_1,Q34_2,Q34_3,Q34_4,Q34_5,Q34_6,Q34_7,Q34_8,Q34_9,Q34_10,Q34_11,Q34_12,Q34_13,Q34_14,Q34_15,Q34_16,Q47,Q59,Q51,Q49,Q49_5_TEXT,Q48,Q50,Q61,Q62,Q65,Q35,Q38,trip_id,depart_time,newmin
2136,11/14/2022 15:53,11/14/2022 15:57,IP Address,100,229,True,11/14/2022 15:57,R_3k7Gy90JYBr35PI,email,EN,No,,,,I live off campus and work or attend class at ...,,,Faculty member,,less than 10 minutes,fewer than 4 miles,30309,"Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)",9:00am,6:00pm,Don't have anyone to ride/carpool with,Don't like to depend on others for carpooling,,,,,,,,,,,,,,Safety concerns,,,,,,,,,,,,,,,,,,,,,No,,,,,,,,,,,,,,Improved bicycle access to campus,,,"Safe, convenient bike paths and routes",,"Secure, convenient bicycle parking",,,,"Other, please specify",eliminate scooters from atlanta,Walk,,,,,,,,,,,,,,,,,,,,,,,,"No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","Yes, and I have used this program","Yes, and I have used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",$50 to $100,No,,,,,,,,,,No,2136,2023-01-01 09:00:00,540.0
992,11/3/2022 12:21,11/3/2022 12:30,IP Address,100,515,True,11/3/2022 12:30,R_2qwqO9B2qbXxtcE,email,EN,No,,,,I live off campus and work or attend class at ...,,,Faculty member,,20-39 minutes,4 to 10 miles,30345,"Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)",9:00am,5:00pm,,Don't like to depend on others for carpooling,"Drop off children (childcare, school, after sc...",,Irregular work schedule,Live close to campus,,,Need to leave quickly in an emergency,,,,,,,,,,,,,,,,,,,,,,,,,,,,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",more than $200,Yes,I rent a scooter from a shared mobility provider,Walking,,CRB,,No,,,,Yes,992,2023-01-01 09:00:00,540.0


In [97]:
# package for querying zip code
import pgeocode

locator = pgeocode.Nominatim("us")
location = locator.query_postal_code(30310)
print((location.latitude, location.longitude))

(33.7278, -84.4232)


In [98]:
# extract longitude/latitude for all positions!
def extract_lon_lat(df_row):
    location = locator.query_postal_code(df_row["ZIPCODE"])
    return pd.Series({
        "longitude": location.longitude,
        "latitude": location.latitude
    })

gt_survey[["ori_lon", "ori_lat"]] = gt_survey.apply(extract_lon_lat, axis=1)
display(gt_survey.sample(2))

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration..in.seconds.,Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,Q66,Q73,Q73_2_TEXT,Q73_5_TEXT,Q2,Q68,Q67,Q3,Q3_6_TEXT,Q5,Q45,ZIPCODE,Q56_1,Q56_2,Q56_3,Q56_4,Q56_5,Q10,Q11,Q12_1,Q12_2,Q12_3,Q12_4,Q12_5,Q12_6,Q12_7,Q12_8,Q12_9,Q12_10,Q12_11,Q12_12,Q12_13,Q12_18,Q12_14,Q12_15,Q12_16,Q12_17,Q12_17_TEXT,Q13_1,Q13_2,Q13_3,Q13_1_TEXT,Q13_2_TEXT,Q13_3_TEXT,Q14_1,Q14_2,Q14_3,Q14_1_TEXT,Q14_2_TEXT,Q14_3_TEXT,Q15,Q16,Q16_3_TEXT,Q69,Q69_6_TEXT,Q17,Q18,Q19,Q55,Q20_1,Q20_2,Q20_3,Q20_4,Q20_5,Q20_6,Q20_7,Q20_8,Q20_9,Q20_10,Q20_11,Q20_13,Q20_14,Q20_16,Q20_17,Q20_18,Q20_19,Q20_20,Q20_21,Q20_22,Q20_22_TEXT,Q25,Q25_14_TEXT,Q64,Q26,Q26_16_TEXT,Q27,Q28,Q29,Q30_1,Q30_2,Q30_3,Q30_4,Q30_5,Q30_6,Q30_7,Q30_8,Q30_12,Q30_10,Q30_11,Q32_1,Q32_2,Q32_3,Q32_4,Q32_5,Q34_1,Q34_2,Q34_3,Q34_4,Q34_5,Q34_6,Q34_7,Q34_8,Q34_9,Q34_10,Q34_11,Q34_12,Q34_13,Q34_14,Q34_15,Q34_16,Q47,Q59,Q51,Q49,Q49_5_TEXT,Q48,Q50,Q61,Q62,Q65,Q35,Q38,trip_id,depart_time,newmin,ori_lon,ori_lat
783,11/2/2022 15:13,11/2/2022 15:27,IP Address,100,846,True,11/2/2022 15:27,R_1jNG4iKL8q7hQtk,email,EN,No,,,,I live off campus and work or attend class at ...,,,Staff member,,20-39 minutes,4 to 10 miles,30316,"Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)","Drive alone (car, truck, motorcycle, moped)",9:00am,5:00pm,Don't have anyone to ride/carpool with,,,,Irregular work schedule,,,,,Transit options are not available where I live,,Transit takes too long,,,,,Anything else takes too much time,,,,,,,,,,,,,,,,,,,,No,,,,,,,"Expanded regional transit options (MARTA, ligh...",,,,,,"Increased financial incentives (e.g., transit ...",,Need to save money,,"Safe, convenient bike paths and routes",,,,Vanpool program,,,,Bicycle,,,,,,,,,,,,,,,,,,,,,,,,"Yes, and I have used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, but I have NOT used this program","Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, but I have NOT used this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","No, I am not aware of this program",less than $50,No,,,,,,,,,,Yes,783,2023-01-01 09:00:00,540.0,-84.3339,33.7217
2107,11/14/2022 15:03,11/14/2022 15:07,IP Address,100,245,True,11/14/2022 15:07,R_2SiQ74pFPH2RDMQ,email,EN,No,,,,I live off campus and work or attend class at ...,,,Staff member,,40-59 minutes,4 to 10 miles,30319,Telecommute (work from home),Telecommute (work from home),Telecommute (work from home),MARTA rail,Telecommute (work from home),8:30am,5:00pm,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Chamblee,North Avenue station,,Drive,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","Yes, and I have used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","Yes, and I have used this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program","No, I am not aware of this program",,No,,,,,,,,,MARTA rail is great!,Yes,2107,2023-01-01 08:30:00,510.0,-84.3351,33.8687


In [109]:
gt_survey["ZIPCODE"].unique()

array([30344, 30310, 30345, 30038, 30312, 30319, 30318, 30034, 30180,
       30363, 30308, 30033, 30004, 30309, 30064, 30269, 30313, 30127,
       30047, 30329, 30549, 30068, 30339, 30316, 30075, 30350, 30326,
       30024, 30032, 90294, 30135, 30305, 30157, 30144, 30043, 30306,
       30327, 30097, 30153, 30030, 30519, 30126, 30022, 30008, 30035,
       30096, 30066, 30238, 30072, 30349, 30340, 30189, 30342, 30062,
       30152, 30040, 30116, 30044, 30046, 30364, 30341, 30067, 30078,
       30333, 30328, 30094, 30360, 30338,     0, 30324, 30019, 30542,
       30315, 30002, 30274, 30332, 30092, 30102, 30080, 30039, 30005,
       30101, 30307, 30314, 30041, 30143, 30228, 30012, 30518, 30263,
       30281, 30214, 30071, 30650, 30548, 30331, 30273, 30138, 30294,
       30114, 30213, 30317, 30141, 30122, 30076, 30303, 30016, 30060,
       30253, 30093, 33012, 30115, 30161, 30297, 30311, 30633, 30252,
       30013, 30215, 30082, 30084, 30188, 30028, 30337,  3014, 30087,
       31064, 30260,

In [99]:
# add Geogia Tech as final destination
gt_survey["dest_lon"] = -84.397971
gt_survey["dest_lat"] = 33.775766

In [100]:
# clean results
trip_name_map = {
    "ori_lon": "orig_lon",
    "ori_lat": "orig_lat",
    "dest_lon": "dest_lon",
    "dest_lat": "dest_lat",
    "newmin": "new_min"
}

trips = gt_survey.rename(columns=trip_name_map)

In [101]:
trips = gpd.GeoDataFrame(
    trips,
    geometry=gpd.points_from_xy(
        trips.orig_lon,
        trips.orig_lat
    ),
    crs="EPSG:4326"
)

In [108]:
trip_lst = []
for index, row in trips.iterrows():
    try: 
        trip_lst.append(
            TripDemand(
                row["trip_id"],
                row["orig_lon"], row["orig_lat"],
                row["dest_lon"], row["dest_lat"],
                row["new_min"], row["geometry"],
            )
        )
    except Exception as e:
        # many records do not have lon lat provided
        print(e)
        print("\n\n")

2 validation errors for TripDemand
1
  Input should be less than or equal to 180 [type=less_than_equal, input_value=nan, input_type=float]
    For further information visit https://errors.pydantic.dev/2.11/v/less_than_equal
2
  Input should be less than or equal to 90 [type=less_than_equal, input_value=nan, input_type=float]
    For further information visit https://errors.pydantic.dev/2.11/v/less_than_equal



2 validation errors for TripDemand
1
  Input should be less than or equal to 180 [type=less_than_equal, input_value=nan, input_type=float]
    For further information visit https://errors.pydantic.dev/2.11/v/less_than_equal
2
  Input should be less than or equal to 90 [type=less_than_equal, input_value=nan, input_type=float]
    For further information visit https://errors.pydantic.dev/2.11/v/less_than_equal



2 validation errors for TripDemand
1
  Input should be less than or equal to 180 [type=less_than_equal, input_value=nan, input_type=float]
    For further information vis

In [112]:
trips = gpd.GeoDataFrame(trip_lst, crs="EPSG:4326")
print(trips.shape)

(1539, 7)


In [113]:
trips.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "trips.shp"
    )
)