In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from scipy import sparse
import joblib

print("Loading dataset...")
df = pd.read_csv("futuristic_city_traffic.csv", encoding="utf-8")
print(f"Dataset loaded. Total rows: {len(df)}")


Loading dataset...
Dataset loaded. Total rows: 1219567


In [2]:
# Sample 60,000 rows randomly for faster training
df_sample = df.sample(n=60000, random_state=42).reset_index(drop=True)


In [4]:
# Define features
categorical_features = ["City", "Vehicle Type", "Weather", "Economic Condition", "Day Of Week"]
numerical_features = ["Hour Of Day", "Speed", "Is Peak Hour", "Random Event Occurred", "Energy Consumption"]

print("Encoding categorical features...")
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=True)  # Compatible with all versions
X_cat = encoder.fit_transform(df_sample[categorical_features])

# Combine with numerical features
X_num = df_sample[numerical_features].values
X = sparse.hstack([X_cat, X_num], format="csr")
y = df_sample["Traffic Density"].values


Encoding categorical features...


In [5]:
print("Splitting data and training model...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)
print("Model training complete.")


Splitting data and training model...
Model training complete.


In [6]:
# Evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.4f}")
print(f"R² Score: {r2:.4f}")


MAE: 0.0702
R² Score: 0.7727


In [7]:
# Save trained model and encoder
joblib.dump(model, "traffic_model.pkl")
joblib.dump(encoder, "encoder.pkl")
print("Model and encoder saved successfully.")


Model and encoder saved successfully.
