In [6]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import streamlit as st

# Loading data
traffic_df = pd.read_csv("traffic_data.csv", parse_dates=["datetime"])
weather_df = pd.read_csv("weather_data.csv", parse_dates=["datetime"])

# Merging datasets on datetime
df = pd.merge(traffic_df, weather_df, on="datetime")

# Displaying raw data
st.title("Urban Traffic & Weather Impact Study (Offline Mode)")
st.write("### Merged Dataset")
st.dataframe(df)

# Correlation analysis
st.write("### Correlation Analysis")
corr = df.corr(numeric_only=True)
st.dataframe(corr)

# Preparing data for ML model
X = df[["temperature", "rain_mm", "air_quality_index"]]
y = df["congestion_level"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Model evaluation
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

st.write("### Model Performance")
st.write(f"Mean Absolute Error: {mae:.2f}")
st.write(f"R² Score: {r2:.2f}")

# Best time to travel prediction
st.write("### Best Time to Travel Prediction")
df["predicted_congestion"] = model.predict(X)
best_time = df.loc[df["predicted_congestion"].idxmin()]

st.write(f"Best time to travel: {best_time['datetime']} "
         f"with predicted congestion level {best_time['predicted_congestion']:.1f}%")

# Visualization
st.write("### Congestion vs Weather Factors")
st.line_chart(df.set_index("datetime")[["congestion_level", "predicted_congestion"]])




DeltaGenerator()