In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("../dataset/Camera_Traffic_Counts_20251108.csv", low_memory=False)

# Normalize column names to snake_case for easier handling
df.columns = (df.columns.str.strip()
                        .str.lower()
                        .str.replace(r"[^\w]+", "_", regex=True))

# Common column name patterns in this dataset
device_col = next(c for c in df.columns if c in ["atd_device_id","device_id","camera_id"])
# vehicle count column candidates
for c in ["total_volume","count","vehicle_count","vehicles","traffic_count"]:
    if c in df.columns:
        count_col = c
        break

# Build datetime from 'read_date' if present, else from components
if "read_date" in df.columns:
    df["datetime"] = pd.to_datetime(df["read_date"], errors="coerce")
else:
    # Some files have commas in numbers: '2,020'. Clean and convert.
    for c in ["year","month","day","hour","minute"]:
        if c in df.columns:
            df[c] = (df[c].astype("string").str.replace(",","", regex=False).str.strip()
                              .astype("Int64"))
    df["datetime"] = pd.to_datetime(df[["year","month","day","hour","minute"]], errors="coerce")

# Helpful date parts
df["date"] = df["datetime"].dt.date
df["hour"] = df["datetime"].dt.hour
# 15-min slot index (dataset is 15-min binned)
df["slot_15m"] = df["datetime"].dt.floor("15min")

# Optional: if the file includes bin duration
bin_sec = 900
if "bin_duration_seconds" in df.columns:
    # fall back to 900 if column is missing/null
    s = pd.to_numeric(df["bin_duration_seconds"], errors="coerce").dropna()
    if not s.empty:
        bin_sec = int(s.mode().iat[0])
expected_bins_per_day = 86400 // bin_sec

df.head()


Unnamed: 0,record_id,atd_device_id,read_date,intersection_name,direction,movement,heavy_vehicle,volume,speed_average_miles_per_hour_,speed_stddev,...,month,day,year,hour,minute,day_of_week,bin_duration_seconds_,datetime,date,slot_15m
0,807feb986678a9d1502500e45b95cbfb,6547,2020 Nov 15 08:30:00 AM,RIVERSIDE DR / CROSSING PL,WESTBOUND,LEFT TURN,False,2.0,29.5,37.477,...,11.0,15.0,2020,8.0,30.0,0.0,900.0,2020-11-15 08:30:00,2020-11-15,2020-11-15 08:30:00
1,b68106cfd81e9616f0d212594a40fe4d,6547,2020 Nov 15 08:30:00 AM,RIVERSIDE DR / CROSSING PL,WESTBOUND,RIGHT TURN,False,1.0,9.0,0.0,...,11.0,15.0,2020,8.0,30.0,0.0,900.0,2020-11-15 08:30:00,2020-11-15,2020-11-15 08:30:00
2,264064fe0b8c1efa75746379aa8fa26d,6547,2020 Nov 15 08:30:00 AM,RIVERSIDE DR / CROSSING PL,WESTBOUND,THRU,False,98.0,31.265,9.358,...,11.0,15.0,2020,8.0,30.0,0.0,900.0,2020-11-15 08:30:00,2020-11-15,2020-11-15 08:30:00
3,73259de28a6c14f9b8a5fac147197530,6547,2020 Nov 15 08:30:00 AM,RIVERSIDE DR / CROSSING PL,WESTBOUND,THRU,True,2.0,26.5,13.435,...,11.0,15.0,2020,8.0,30.0,0.0,900.0,2020-11-15 08:30:00,2020-11-15,2020-11-15 08:30:00
4,b3e53e069b1b2dc39b0499e0d8f01e79,6547,2020 Nov 15 08:45:00 AM,RIVERSIDE DR / CROSSING PL,EASTBOUND,LEFT TURN,False,22.0,14.227,5.968,...,11.0,15.0,2020,8.0,45.0,0.0,900.0,2020-11-15 08:45:00,2020-11-15,2020-11-15 08:45:00


In [None]:
df.dtypes