In [37]:
import pandas as pd
import os
from pathlib import Path
import logging
import warnings
import itertools

from utils import *

warnings.filterwarnings("ignore")

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s",
)



In [3]:
def read_network(network_path):
    return pd.read_csv(network_path, low_memory=False, index_col="id")


def read_volume(volume_path):
    return (
        pd.read_csv(volume_path, low_memory=False)
        .rename(columns={"link_id": "id"})
        .set_index("id")
    )

# final entry point

In [88]:
scenario_path = '/Users/tszchun.chow/Documents/GitHub/monty-data-visualization/pt_variations/baseline'
PV_MODES = ["pv", "freight"]
PT_MODES = ["bus", "rail"]
BOUNDARIES = ["regional_council"]
ASSIGNMENT_METRICS = {'vkt': 'Vehicle Kilometres Travelled',
                      'vht': 'Vehicle Hours Travelled',
                      'pkt': 'Passengers Kilometres Travelled',
                      'pht': 'Passengers Hours Travelled',
                      'length': 'Road Kilometres',
                      'lane_km': 'Lane Kilometres'
                      }

PT_ASSIGNMENT_METRICS = ['vkt', 'vht', 'pkt', 'pht']
PV_ASSIGNMENT_METRICS = ['vkt', 'vht']
NETWORK_METRICS = ['length', 'lane_km']

In [93]:
def assignment_hourly_groupby(df, group, metric, area, period_key, period, mode):
    tmp_df = df[[area, *[f"{i}_{metric}" for i in period]]].groupby([area])[[f"{i}_{metric}" for i in period]].apply(sum).sum(axis=1).reset_index().rename(columns={0: "Value"})
    tmp_df['period'] = period_key
    tmp_df['mode'] = mode
    tmp_df['Group'] = group
    tmp_df['Metric'] = f"{ASSIGNMENT_METRICS[metric]}"

    return tmp_df

def assignment_distance_groupby(df, group, metric, area, mode, division_factor = 1000):
    tmp_df = df.groupby([area]).agg({metric: sum}).rename(columns={metric: "Value"})/division_factor
    tmp_df = tmp_df.reset_index()
    tmp_df['mode'] = mode
    tmp_df['Group'] = group
    tmp_df['Metric'] = f"{ASSIGNMENT_METRICS[metric]}"

    return tmp_df

def calculate_pt_metrics(network_df, volume_df, time_df, passenger_df, mode) -> pd.DataFrame:
    tmp_df = pd.DataFrame()
    df = network_df.join(volume_df, on="id", how="inner")
    df = df.join(time_df, on="id", rsuffix="_time")
    df = df.join(passenger_df, on="id", rsuffix="_passenger")
    pt_assignment_metrics_dfs = []

    # Add addtional columns
    for i in range(0, 24):
        # Vehicle travel information
        df[f"{i}_vkt"] = df[f"{i}"] * (df["length"] / 1000)
        df[f"{i}_vht"] = df[f"{i}"] * (df[f"{i}_time"] / 3600)
        # Passenger travel information
        df[f"{i}_pkt"] = df[f"{i}_passenger"] * (df["length"] / 1000)
        df[f"{i}_pht"] = df[f"{i}_passenger"] * (df[f"{i}_time"] / 3600)
        # Volume to ratio
    
    for area in BOUNDARIES:
        all_regions = set(network_df[area])
        for period_key, period in PERIODS.items():
            for metric in PT_ASSIGNMENT_METRICS:
                pt_assignment_metrics_dfs.append(assignment_hourly_groupby(df, 'Assignment', metric, area, period_key, period, mode))

    pt_assignment_metrics_summary = pd.concat(pt_assignment_metrics_dfs)

    return pt_assignment_metrics_summary

def calculate_pv_metrics(network_df, volume_df, time_df, mode) -> pd.DataFrame:
    tmp_df = pd.DataFrame()

    df = network_df.join(volume_df, on="id", how="inner")
    df = df.join(time_df, on="id", rsuffix="_time")

    # Add addtional columns
    for i in range(0, 24):
        df[f"{i}_vkt"] = df[f"{i}"] * (df["length"] / 1000)
        df[f"{i}_vht"] = df[f"{i}"] * (df[f"{i}_time"] / 3600)
        df[f"{i}_vol_to_cap_ratio"] = df[f"{i}"] / df["capacity"]

    df["lane_km"] = df["length"] * df["lanes"]

    pv_assignment_metrics_dfs = []

    # Calculate VHT, VKT
    for area in BOUNDARIES:
        for metric in NETWORK_METRICS:
             pv_assignment_metrics_dfs.append(assignment_distance_groupby(df, 'Network', metric, area, mode))
        all_regions = set(network_df[area])
        for period_key, period in PERIODS.items():
            for metric in PV_ASSIGNMENT_METRICS:
                pv_assignment_metrics_dfs.append(assignment_hourly_groupby(df, 'Assignment', metric, area, period_key, period, mode))

    pv_assignment_metrics_summary = pd.concat(pv_assignment_metrics_dfs)
    
    return pv_assignment_metrics_summary            

In [95]:
tmp_assignment_dfs = []
tmp_assignment_summary = pd.DataFrame
logging.info("Read network....")
network_df = read_network(os.path.join(scenario_path, "link_table.csv"))

# Calculate for PV
for mode in PV_MODES:
    logging.info(f"Read volume data for {mode}")
    
    volume_df = read_volume(os.path.join(scenario_path, f"volumes_{mode}.csv"))
    time_df = read_volume(os.path.join(scenario_path, f"times_{mode}.csv"))

    logging.info(f"Calculate pv metrics for {mode}")
    tmp_pv_metrics = calculate_pv_metrics(network_df, volume_df, time_df, mode)

    tmp_assignment_dfs.append(tmp_pv_metrics)

# Calculate for PT
for mode in PT_MODES:
    logging.info(f"Read passenger data for {mode}")
    if mode == 'rail':
        if not os.path.exists(os.path.join(scenario_path, f"volumes_{mode}.csv")):
            mode = 'train'
    volume_df = read_volume(os.path.join(scenario_path, f"volumes_{mode}.csv"))
    time_df = read_volume(os.path.join(scenario_path, f"times_{mode}.csv"))
    passenger_df = read_volume(
        os.path.join(scenario_path, f"passengers_{mode}.csv")
    )

    logging.info(f"Calculate pt metrics for {mode}")
    tmp_pt_metrics = calculate_pt_metrics(
        network_df, volume_df, time_df, passenger_df, mode
    )

    tmp_assignment_dfs.append(tmp_pt_metrics)

assignment_summary = pd.concat(tmp_assignment_dfs, ignore_index=True)
assignment_summary['period'] = assignment_summary['period'].fillna('-')
assignment_summary = pd.concat([assignment_summary, assignment_summary.groupby(['Metric', 'mode','period']).agg({'Value':'sum'}).reset_index()])
assignment_summary['regional_council'] = assignment_summary['regional_council'].fillna('All')
assignment_summary

2023-12-13 11:05:35,486 - INFO - [1762638169.py:3] - Read network....
2023-12-13 11:05:36,300 - INFO - [1762638169.py:8] - Read volume data for pv
2023-12-13 11:05:37,721 - INFO - [1762638169.py:13] - Calculate pv metrics for pv
2023-12-13 11:05:39,592 - INFO - [1762638169.py:8] - Read volume data for freight
2023-12-13 11:05:39,619 - INFO - [1762638169.py:13] - Calculate pv metrics for freight
2023-12-13 11:05:39,755 - INFO - [1762638169.py:20] - Read passenger data for bus
2023-12-13 11:05:40,296 - INFO - [1762638169.py:30] - Calculate pt metrics for bus
2023-12-13 11:05:41,471 - INFO - [1762638169.py:20] - Read passenger data for rail
2023-12-13 11:05:41,501 - INFO - [1762638169.py:30] - Calculate pt metrics for rail


Unnamed: 0,regional_council,Value,mode,Metric,period
0,Auckland Region,10867.402588,pv,Road Kilometres,-
1,Bay of Plenty Region,6238.036666,pv,Road Kilometres,-
2,Canterbury Region,24182.891858,pv,Road Kilometres,-
3,Gisborne Region,2995.489650,pv,Road Kilometres,-
4,Hawke's Bay Region,6863.760215,pv,Road Kilometres,-
...,...,...,...,...,...
47,All,6171.808351,rail,Vehicle Kilometres Travelled,AM
48,All,40645.533678,rail,Vehicle Kilometres Travelled,Daily
49,All,13585.330348,rail,Vehicle Kilometres Travelled,IP
50,All,9260.385536,rail,Vehicle Kilometres Travelled,OP


In [None]:
parquet = '/Users/tszchun.chow/Documents/GitHub/monty-data-visualization/pt_variations/frequency_variations.parquet'
pd.read_parquet(parquet)