In [1]:
# Import libraries
from pathlib import Path
import zipfile
import io
import datetime
import pandas as pd
import geopandas as gpd
import folium
from folium.plugins import Geocoder

**AO Transport Recommendations**

- *Rideshare and taxi patrons* will use the designated drop-off and pick-up location outside the *Grand Slam Oval entrance on Olympic Boulevard*.

- *Taxi patrons* may also be dropped off and picked up at the *Flinders Street Station taxi rank*.

- Patrons travelling by *car* can park at *Yarra Park*.

In [2]:
# Read traffic geojson file
site_gdf = gpd.read_file("../data/Traffic_Lights.geojson")

# Create traffic map
site_map = folium.Map(
    location=[site_gdf.geometry.y.mean(), site_gdf.geometry.x.mean()],
    tiles="OpenStreetMap"
)

# Create traffic sites
for row in site_gdf.itertuples():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=4,
        tooltip=f"Site Number: {row.SITE_NO}, Site Name: {row.SITE_NAME}"
    ).add_to(site_map)

# Add location search bar 
Geocoder().add_to(site_map)

site_map

Based on the AO transport recommendations and traffic map, the following sites were selected to best represent the traffic generated by AO patrons.

In [3]:
transport_dict = {"Taxi": {"Olympic Boulevard": [3445, 4551], "Flinders Street Station": [4530, 4561, 4563, 4570]},
                  "Rideshare": {"Olympic Boulevard": [3445, 4551]},
                  "Car": {"Yarra Park": [1052, 1586, 3391, 4452]}}

# Collate all sites into one list
site_lst = []
for outer_key in transport_dict.keys():
    for lst in transport_dict[outer_key].values():
        site_lst += lst
site_lst = set(site_lst)

site_lst

{1052, 1586, 3391, 3445, 4452, 4530, 4551, 4561, 4563, 4570}

In [6]:
# Clean traffic volume data

# directory_path = Path("../data")

# for file_path in directory_path.glob("*.zip"):

#     year = int(str(file_path)[-8:-4])
#     if year in [2022, 2023, 2024, 2025]:
#         continue

dfs = []
with zipfile.ZipFile("../data/traffic_signal_volume_data_2021.zip") as z1:
    for inner_zip_name in z1.namelist():
        with z1.open(inner_zip_name) as inner_zip_file:
            with zipfile.ZipFile(io.BytesIO(inner_zip_file.read())) as z2:
                for i, file_name in enumerate(sorted(z2.namelist())):
                    with z2.open(file_name) as f:
                        df = pd.read_csv(f)

                    # Filter relevant traffic sites
                    df = df[df["NB_SCATS_SITE"].isin(site_lst)].reset_index(drop=True)

                    # Extract date from filename
                    date_str = file_name[-10:-4]
                    date_obj = datetime.date(
                        int(date_str[0:2]),
                        int(date_str[2:4]),
                        int(date_str[4:6])
                    )

                    weekday = date_obj.weekday()

                    df = df.assign(
                        Year=date_obj.year,
                        Month=date_obj.month,
                        Date=date_obj.day,
                        Day=weekday,
                        Weekday=weekday < 5
                    )

                    dfs.append(df.iloc[1:] if i > 0 else df)
                    print(f"Reading {file_name} complete.")

traffic_df = pd.concat(dfs, ignore_index=True)

# Drop irrelavant columns
traffic_df.drop(["QT_INTERVAL_COUNT", "QT_VOLUME_24HOUR", "NM_REGION", "CT_ALARM_24HOUR"], axis=1, inplace=True)

# Rename column names
traffic_df.rename(columns={"NB_SCATS_SITE": "Site Number",
                "NB_DETECTOR": "Detector Number",
                "CT_RECORDS": "Number of Recorded Periods"}, inplace=True)

traffic_df["Total Traffic Volume"] = traffic_df.iloc[:, 6:102].sum()

# Sum traffic volume by site
traffic_df = traffic_df.groupby(["Site Number", "Year", "Month", "Date", "Day", "Weekday"], as_index=False).sum().drop(["Detector Number"], axis=1)

# Sum traffic volume by hour
j = 1
for i in range(1, 97, 4):
    traffic_df[f"Traffic Hour {j}"] = traffic_df.iloc[:, i+5:i+9].sum(axis=1)
    j += 1
traffic_df.drop(traffic_df.columns[6:102], axis=1, inplace=True)

# Add site name column
def add_site_name(row):
    for outer_key in transport_dict.keys():
        inner_dict = transport_dict[outer_key]
        for site_name in inner_dict.keys():
            if row["Site Number"] in inner_dict[site_name]:
                return site_name

# Apply the function row-by-row
traffic_df["Site Name"] = traffic_df.apply(add_site_name, axis=1)

# Save cleaned dataset for future faster loading
traffic_df.to_csv(f"../cleaned_data/traffic_volume_2021.csv", index=False)

Reading VSDATA_20210101.csv complete.
Reading VSDATA_20210102.csv complete.
Reading VSDATA_20210103.csv complete.
Reading VSDATA_20210104.csv complete.
Reading VSDATA_20210105.csv complete.
Reading VSDATA_20210106.csv complete.
Reading VSDATA_20210107.csv complete.
Reading VSDATA_20210108.csv complete.
Reading VSDATA_20210109.csv complete.
Reading VSDATA_20210110.csv complete.
Reading VSDATA_20210111.csv complete.
Reading VSDATA_20210112.csv complete.
Reading VSDATA_20210113.csv complete.
Reading VSDATA_20210114.csv complete.
Reading VSDATA_20210115.csv complete.
Reading VSDATA_20210116.csv complete.
Reading VSDATA_20210117.csv complete.
Reading VSDATA_20210118.csv complete.
Reading VSDATA_20210119.csv complete.
Reading VSDATA_20210120.csv complete.
Reading VSDATA_20210121.csv complete.
Reading VSDATA_20210122.csv complete.
Reading VSDATA_20210123.csv complete.
Reading VSDATA_20210124.csv complete.
Reading VSDATA_20210125.csv complete.
Reading VSDATA_20210126.csv complete.
Reading VSDA

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xde in position 12: invalid continuation byte

In [None]:
# Need to add new total traffic volume column due to the negative values present
# Need to detect negative values in recorded periods and change to 0
# Remove rows with 0 i.e. traffic sites may have less than the limit number of detectors so won't have all rows filled up

traffic_df = pd.concat(dfs, ignore_index=True)

# Drop irrelavant columns
traffic_df.drop(["QT_INTERVAL_COUNT", "QT_VOLUME_24HOUR", "NM_REGION", "CT_ALARM_24HOUR"], axis=1, inplace=True)

# Rename column names
traffic_df.rename(columns={"NB_SCATS_SITE": "Site Number",
                "NB_DETECTOR": "Detector Number",
                "CT_RECORDS": "Number of Recorded Periods"}, inplace=True)

traffic_df["Total Traffic Volume"] = traffic_df[traffic_df.columns[6:102]].sum(axis=1) 

traffic_df

ValueError: the 'dtype' parameter is not supported in the pandas implementation of sum()