In [None]:
import pandas as pd
from pandas import unique
import sys
from pathlib import Path
import pathlib

# Add project root to path
project_root = Path().absolute().parent
sys.path.insert(0, str(project_root))

from utils.new_columns_fiorenzo import add_speed_direction_to_summary, pixel_meters_ratio, add_day_night_to_summary, classify_vehicle_types
from utils.plots_fiorenzo import visualize_classification, vehicle_count_by_category_bar_chart, average_speed_by_weekday_and_hour, speeding_vehicles_histogram, speed_category_bar_chart
from data_pipeline import process_all_sessions
from utils.loader import load_data_from_database

In [None]:
final_summary = None
summary_loaded = False
print("Do you want to load processed_traffic_data.csv?")
if input("Type 'y' to proceed: ").lower() == 'y':
    final_summary = pd.read_csv('../data/processed_traffic_data.csv')
    summary_loaded = True

In [None]:
if not summary_loaded:
    print("Do you want to load data from the database? This may take a while.")
    if input("Type 'y' to proceed: ").lower() == 'y':
        df = load_data_from_database()
        df = df.sort_values(['date_time', 'frame_id'])
    else:
        df = pd.read_csv("../data/raw_traffic_data.csv", parse_dates=['date_time'])
        df = df.sort_values(['date_time', 'frame_id'])

In [None]:
if not summary_loaded:
    df['session_id'] = (
        (df['frame_id'].diff() < 0)
    ).cumsum()

In [None]:
if not summary_loaded:
    final_summary = process_all_sessions(df)
    print(final_summary.head())
    print(unique(final_summary['category']))
    final_summary.to_csv("../data/processed_traffic_data.csv", index=False)

In [None]:
## Filter viable vehicles
final_summary = final_summary[final_summary['unified_id'].notna()]

In [None]:
## Check if directory plots exists
path = pathlib.Path("../plots")
path.mkdir(parents=True, exist_ok=True)

In [None]:
## Filter time
final_summary['t_start'] = pd.to_datetime(final_summary['t_start'])
final_summary['t_end'] = pd.to_datetime(final_summary['t_end'])

start_date = pd.to_datetime('2025-12-01')
end_date = pd.to_datetime('2026-01-01')

final_summary = final_summary[(final_summary['t_start'] >= start_date) & (final_summary['t_start'] < end_date)]
print(f"Filtered data from {start_date.date()} to {end_date.date()}")
print(f"Remaining records: {len(final_summary)}")
print(final_summary)

In [None]:
## Duration, Speed and Direction
final_summary['duration'] = (final_summary['t_end'] - final_summary['t_start']).dt.total_seconds()

final_summary = add_speed_direction_to_summary(final_summary)

print(final_summary.head())

In [None]:
## Create columns size_mean and h/w_mean ratio
final_summary['size_mean'] = final_summary['w_mean'] * final_summary['h_mean']
final_summary['h_w_mean_ratio'] = final_summary['h_mean'] / final_summary['w_mean']
print(final_summary.head())

In [None]:
## Mean height, width and size in meters
final_summary['h_mean_meters'] = final_summary['h_mean'] / pixel_meters_ratio
final_summary['w_mean_meters'] = final_summary['w_mean'] / pixel_meters_ratio
final_summary['size_mean_meters'] = final_summary['size_mean'] / pixel_meters_ratio
print(final_summary)

In [None]:
## Day/Night difference
final_summary = add_day_night_to_summary(final_summary)
print(final_summary)

In [None]:
## Apply vehicle classification
# Classify vehicles using automatic data-driven thresholds
final_summary = classify_vehicle_types(final_summary, verbose=True, night_width_threshold=0)

'''
# Optional: Use custom thresholds if you want to fine-tune the classification
# Uncomment and adjust values as needed:
# final_summary_with_class = classify_vehicle_types(
#     final_summary_viable,
#     size_threshold=7500,      # Threshold for area (w_mean * h_mean)
#     width_threshold=70,        # Threshold for width
#     height_threshold=140,      # Threshold for height
#     verbose=True
# )
'''
# Show sample results
print("\n" + "="*70)
print("SAMPLE CLASSIFIED VEHICLES")
print("="*70)
print("\nFirst 10 vehicles:")
print(final_summary[['vehicle_id', 'w_mean', 'h_mean', 'size_mean', 'h_w_mean_ratio', 'Class']].head(10).to_string(index=False))

print("\n" + "="*70)
print("CLASS DISTRIBUTION BY TRACK CATEGORY")
print("="*70)
crosstab = pd.crosstab(final_summary['category'],
                       final_summary['Class'],
                       margins=True, margins_name='Total')
print(crosstab)
print("\n" + "="*70)

In [None]:
## Visualize vehicle classification
print("Total")
visualize_classification(final_summary, show_plot=True)
print("Day")
visualize_classification(final_summary, show_plot=True, day_only=True)
print("Night")
visualize_classification(final_summary, show_plot=True, night_only=True)

In [None]:
## Speeding vehicles histogram
speeding_vehicles_histogram(final_summary, show_plot=True)  # Adjust speed limit as needed

In [None]:
## Distribution plot
vehicle_count_by_category_bar_chart(final_summary, show_plot=True)

In [None]:
## Average speed by weekday and hour
average_speed_by_weekday_and_hour(final_summary, show_plot=True)

In [None]:
speed_category_bar_chart(final_summary, show_plot=True)