# Convert to a standardized version of dataframe

A converter to convert data frame to a standard format for carpoolsim:
1. Traffic network links
2. Traffic network nodes
3. Traffic TAZs (polygons)
4. Traffic demands 

In [1]:
import time
import copy
import os
import sys

import numpy as np
import pandas as pd
import geopandas as gpd

%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = None  # display all columns

In [2]:
# need to set this environmental path everytime you moves the project root folder
os.environ['project_root'] = '/Users/geekduck/Dropbox/gt_survey'

sys.path.append(
    os.environ['project_root']
)

In [3]:
from carpoolsim.basic_settings import *

In [4]:
# load traffic network data
tazs_raw = gpd.read_file(os.environ["taz"])
df_nodes_raw = gpd.read_file(os.environ['network_nodes'])
df_links_raw = gpd.read_file(os.environ['network_links'])

In [19]:
from carpoolsim.dataclass.traffic_network import (
    TrafficNetworkLink,
    TrafficNetworkNode,
    TrafficAnalysisZone,
)

In [31]:
# clean node object
node_name_map = {
    "N": "nid",
    "lat": "lat",
    "lon": "lon",
    "X": "x",
    "Y": "y",
    "geometry": "geometry"
}
df_nodes_raw = df_nodes_raw.rename(columns=node_name_map)

df_nodes_lst = []
for index, row in df_nodes_raw.iterrows():
    df_nodes_lst.append(
        TrafficNetworkNode(row["nid"], row["lon"], row["lat"], row["x"], row["y"], row["geometry"])
    )

In [44]:
# clean link object
link_name_map = {
    "A": "a",
    "B": "b",
    "DISTANCE": "distance",
    "FACTYPE": "factype",
    "geometry": "geometry",
    "SPEED_LIMI": "speed_limit"
}
df_links_raw = df_links_raw.rename(columns=link_name_map)
df_links_raw["a_b"] = f'{df_links_raw["a"]}_{df_links_raw["b"]}'


df_links_lst = []
for index, row in df_links_raw.iterrows():
    df_links_lst.append(
        TrafficNetworkLink(
            row["a"], row["b"], row["a_b"],
            row["distance"], row["factype"], row["speed_limit"],
            row["geometry"]
        )
    )

In [33]:
# clean taz object
taz_name_map = {
    "OBJECTID": "taz_id",
    "geometry": "geometry",
}
tazs_raw = tazs_raw.rename(columns=taz_name_map)


tazs_lst = []
for index, row in tazs_raw.iterrows():
    tazs_lst.append(
        TrafficAnalysisZone(row["taz_id"], row["geometry"])
    )

In [45]:
# convert data sets base to data frames
df_nodes = gpd.GeoDataFrame(df_nodes_lst)
df_links = gpd.GeoDataFrame(df_links_lst)
tazs = gpd.GeoDataFrame(tazs_lst)

In [46]:
print(tazs.shape)
print(df_nodes.shape)
print(df_links.shape)

(5873, 2)
(27524, 6)
(75289, 7)


In [48]:
# store cleaned results to shapefiles
tazs.to_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "tazs.shp")
)
df_nodes.to_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "nodes.shp")
)
df_links.to_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "links.shp")
)

  df_links.to_file(
