In [1]:
import pandas as pd

In [2]:
def process_traffic_flows(road):
    """Process the traffic flow data for traffic flow dataframes """
    #read csv file into dataframe
    path = "../data/traffic_"+road+".csv"
    df = pd.read_csv(path, header=[0,1])

    str_length_road = len(road) #use this string length later to determine what road prefix to keep

    #1 make two transport categories: cargo and people
    # Total cargo is the sum of heavy, medium and small truck
    df[('Aggregate Traffic Data', 'Total Cargo')] = (df[('Traffic Data', 'Heavy Truck')] + df[('Traffic Data', 'Medium Truck')] + df[('Traffic Data', 'Small Truck')])
    # People is the sum of other motorized vehicles
    df[('Aggregate Traffic Data', 'Total People')] = (df[('Traffic Data', 'Large Bus')] + df[('Traffic Data', 'Medium Bus')] + df[('Traffic Data', 'Micro Bus')] + df[('Traffic Data', 'Utility')] + df[('Traffic Data', 'Car')] + df[('Traffic Data', 'Motor Cycle')] + df[('Traffic Data', 'Auto Rickshaw')])

    #compute average chainage
    df[('Average Traffic Data', 'Average Chainage')] = (df[('Start location', 'Chainage')] + df[('End location', 'Chainage')]) / 2

    #drop columns on multi-index level 1
    df.drop(columns = ['Heavy Truck', 'Medium Truck', 'Small Truck', 'Large Bus', 'Medium Bus', 'Micro Bus', 'Utility', 'Car', 'Auto Rickshaw', 'Motor Cycle', 'Bi-Cycle', 'Cycle Rickshaw', 'Cart', 'Motorized', 'Non Motorized', 'Total AADT', '(Km)', '(AADT)', 'Offset', 'Chainage'], level = 1, inplace=True)

    #drop columns on multi-index level 0
    df.drop(columns = ['Unnamed: 0_level_0', 'Traffic', 'Length'], level = 0, inplace=True)

    #rename column LRP column names

    df[("Start location", "Start_LRP")] = df[("Start location", "LRP")]

    df[("End location", "End_LRP")] = df[("End location", "LRP")]

    #drop LRP columns
    df.drop(columns = [("Start location", "LRP")], inplace=True)
    df.drop(columns = [("End location", "LRP")], inplace=True)

    #drop remaining upper multi-index level
    df= df.droplevel(0,1)

    #rename columns
    df.rename(columns = {'Unnamed: 0_level_1': 'Road', 'Unnamed: 1_level_1': 'Name'}, inplace= True)

    #create dataframe that calculates the average cargo AADT for both road directions
    y = df.groupby('Average Chainage')['Total Cargo'].mean()
    df_y = pd.DataFrame(y)

    #create dataframe that calculates the average people AADT for both road directions
    x = df.groupby('Average Chainage')['Total People'].mean()
    df_x = pd.DataFrame(x)

    #Now that total cargo and total people have been used, they can be dropped
    df.drop(columns = ['Total Cargo', 'Total People'], inplace = True)

    #Also drop the duplicates
    df.drop_duplicates(subset = ['Average Chainage'], inplace = True)

    #rename road values to road prefix
    #remember: str_length_road
    df['Road'] = df['Road'].str[0:str_length_road]

    #join the df on the x and y df's constructed earlier
    df= df.join(df_y, on='Average Chainage')
    df= df.join(df_x, on = 'Average Chainage')

    #reset index
    df.reset_index(inplace=True, drop=True)

    return df

In [3]:
intersections_network = pd.read_csv("../data/intersections.csv")

intersections_roads = list(intersections_network.road.unique())
intersections_roads

['N1', 'N2', 'N102', 'N104', 'N105', 'N8', 'N204', 'N207', 'N208']

In [4]:
flows_dict = {}
df_names_list = []
for road in intersections_roads:
    df_name = f"traffic_{road}"
    df = process_traffic_flows(road)
    flows_dict[df_name] = df

In [5]:
df_flows = pd.concat([df for df_name, df in flows_dict.items()] , ignore_index = True)
#save to csv
df_flows.to_csv("../data/cleaned_traffic_flows.csv")