In [None]:
# Step 1: Upload File
from google.colab import files
uploaded = files.upload()

# Step 2: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

#  Step 3: Load CSV
data = pd.read_csv('traffic_congestion.csv')

#  Step 4: Clean Column Names
data.columns = data.columns.str.strip()

#  Step 5: Show Columns
print("Columns in dataset:")
print(data.columns)

# Step 6: Detect Target Column
target_column = None
for col in data.columns:
    if "congestion" in col.lower() and "level" in col.lower():
        target_column = col
        break

if not target_column:
    raise Exception("Target column for congestion not found. Please verify your file.")

print(f"\n🎯 Target Column: {target_column}")

#  Step 7: Handle Missing Values
data.dropna(inplace=True)

#  Step 8: Encode All Categorical Columns (except target)
label_encoder = LabelEncoder()

for col in data.columns:
    if data[col].dtype == 'object' and col != target_column:
        data[col] = label_encoder.fit_transform(data[col])

#  Step 9: Encode Target (if needed)
if data[target_column].dtype == 'object':
    data[target_column] = label_encoder.fit_transform(data[target_column])

#  Step 10: Features and Target Split
X = data.drop(target_column, axis=1)
y = data[target_column]

#  Step 11: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#  Step 12: Train Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

#  Step 13: Prediction
y_pred = model.predict(X_test)

#  Step 14: Evaluation
print("\n📈 Classification Report:")
print(classification_report(y_test, y_pred))

#  Step 15: Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6,5))
sns.heatmap(conf_matrix, annot=True, cmap='Blues')
plt.title("Confusion Matrix - Traffic Congestion")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

#  Step 16: Feature Importance
importances = model.feature_importances_
features = X.columns

plt.figure(figsize=(8,6))
sns.barplot(x=importances, y=features)
plt.title("Feature Importance for Congestion Prediction")
plt.xlabel("Importance")
plt.ylabel("Features")
plt.show()
