In [78]:
%pip install pandas numpy scikit-learn joblib streamlit plotly

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\sukru\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [79]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib
from datetime import datetime, timedelta
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

In [80]:
DATA_PATH = 'D:\\junk\\Banglore_traffic_Dataset.csv'


def load_data():
    df = pd.read_csv(DATA_PATH, parse_dates=['Date'])
    return df

In [81]:
def preprocess_data(df):
    # Ensure base encoders directory exists
    os.makedirs("encoders", exist_ok=True)

    bins = [0, 30, 70, 100]
    labels = ['Low', 'Medium', 'High']
    df['CongestionCategory'] = pd.cut(df['Congestion_Level'], bins=bins, labels=labels, include_lowest=True)

    features = ['Date', 'Area_Name', 'Road_Intersection_Name', 'Weather_Conditions', 'Roadwork_and_Construction_Activity']
    X = df[features]
    y = df['CongestionCategory']

    X['DayOfWeek'] = X['Date'].dt.dayofweek
    X['Month'] = X['Date'].dt.month
    X = X.drop(columns=['Date'])

    cat_features = ['Area_Name', 'Road_Intersection_Name', 'Weather_Conditions', 'Roadwork_and_Construction_Activity']
    for col in cat_features:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))

        # Sanitize the filename
        safe_col = col.replace("/", "_").replace("\\", "_").replace(" ", "_")
        path = f'encoders/{safe_col}_label_encoder.joblib'
        joblib.dump(le, path)

    label_encoder = LabelEncoder()
    y_enc = label_encoder.fit_transform(y)
    joblib.dump(label_encoder, 'encoders/congestion_label_encoder.joblib')

    return X, y_enc

In [82]:
# Feature engineering
def prepare_features(df):
    df['hour_sin'] = np.sin(2 * np.pi * df['hour']/24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour']/24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week']/7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week']/7)

    features = ['hour_sin', 'hour_cos', 'day_sin', 'day_cos',
                'temperature', 'precipitation', 'special_event',
                'road_work', 'vehicle_count']

    return df[features]

In [83]:
def train_model(X, y):
    scaler = StandardScaler()
    rf = RandomForestClassifier(n_estimators=100, random_state=42)

    # Create a pipeline for scaling and model
    pipeline = Pipeline([
        ('scaler', scaler),
        ('rf', rf)
    ])

    pipeline.fit(X, y)

    # Save the pipeline (model + scaler)
    joblib.dump(pipeline, 'rf_pipeline.joblib')

    return pipeline

In [84]:
def load_label_encoders():
    from pathlib import Path
    le_area = joblib.load('encoders/Area_Name_label_encoder.joblib')
    le_road = joblib.load('encoders/Road_Intersection_Name_label_encoder.joblib') if Path('encoders/Road_Intersection_Name_label_encoder.joblib').exists() else None
    le_weather = joblib.load('encoders/Weather_Conditions_label_encoder.joblib')
    le_roadwork = joblib.load('encoders/Roadwork_and_Construction_Activity_label_encoder.joblib')
    le_congestion = joblib.load('encoders/congestion_label_encoder.joblib')
    return le_area, le_road, le_weather, le_roadwork, le_congestion

In [85]:

def main():

    # Load data and train model once
    df = load_data()
    try:
        pipeline = joblib.load('rf_pipeline.joblib')
        le_area, le_road, le_weather, le_roadwork, le_congestion = load_label_encoders()
    except FileNotFoundError:
        # Train model if not exists
        X, y = preprocess_data(df)
        pipeline = train_model(X, y)
        le_area, le_road, le_weather, le_roadwork, le_congestion = load_label_encoders()

if __name__ == "__main__":
    main()