In [2]:
import pandas as pd
import numpy as np

# Load data
data = pd.read_csv(r"C:\Users\ramesh\Downloads\traffic1.csv\traffic.csv")
data['DateTime'] = pd.to_datetime(data['DateTime'])
data = data.sort_values('DateTime')

# Time features
data['Hour'] = data['DateTime'].dt.hour
data['Day'] = data['DateTime'].dt.day
data['Month'] = data['DateTime'].dt.month
data['Weekday'] = data['DateTime'].dt.weekday
data['Is_Weekend'] = data['Weekday'].apply(lambda x: 1 if x >= 5 else 0)

# Lag features
data['Lag_1'] = data['Vehicles'].shift(1)
data['Lag_24'] = data['Vehicles'].shift(24)
data['Lag_168'] = data['Vehicles'].shift(168)  # weekly pattern

# Rolling features
data['Roll_Mean_3'] = data['Vehicles'].rolling(3).mean()
data['Roll_Mean_6'] = data['Vehicles'].rolling(6).mean()
data['Roll_Mean_24'] = data['Vehicles'].rolling(24).mean()

# Drop missing values
data.dropna(inplace=True)

print(data.head())


                 DateTime  Junction  Vehicles           ID  Hour  Day  Month  \
56    2015-11-03 08:00:00         1        15  20151103081     8    3     11   
29240 2015-11-03 08:00:00         3         6  20151103083     8    3     11   
14648 2015-11-03 08:00:00         2         6  20151103082     8    3     11   
14649 2015-11-03 09:00:00         2         9  20151103092     9    3     11   
57    2015-11-03 09:00:00         1        29  20151103091     9    3     11   

       Weekday  Is_Weekend  Lag_1  Lag_24  Lag_168  Roll_Mean_3  Roll_Mean_6  \
56           1           0    6.0    21.0     15.0     8.666667     9.000000   
29240        1           0   15.0     8.0      6.0     9.000000     8.166667   
14648        1           0    6.0    10.0      9.0     9.000000     8.666667   
14649        1           0    6.0     8.0      7.0     7.000000     7.833333   
57           1           0    9.0     4.0     13.0    14.666667    11.833333   

       Roll_Mean_24  
56         8.833

In [3]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# Features & target
X = data.drop(columns=['DateTime', 'Vehicles'])
y = data['Vehicles']

# Train-test split (time-based)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


In [None]:
data

## __Train Model :__

In [4]:
best_xgb = XGBRegressor(
    n_estimators=300,
    max_depth=8,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    objective='reg:squarederror',
    random_state=42
)

best_xgb.fit(X_train, y_train)

y_pred = best_xgb.predict(X_test)

# Evaluate Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred) * 100

print("MAE:", mae)
print("RMSE:", rmse)
print("Accuracy (R2):", r2)


MAE: 4.0079450607299805
RMSE: 6.031186750801775
Accuracy (R2): 95.08505463600159


In [None]:
import joblib

# Save trained 
joblib.dump(best_xgb, "model.pkl")

print("Model saved successfully")


Model saved successfully
