In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Step 0: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, LinearRegression, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Step 1: Load the feature-engineered dataset
df = pd.read_csv('/content/drive/MyDrive/AI/selected_features_dataset.csv')

In [None]:
# Step 2: Set target and features
target_column = "actual_time"
X = df.drop(target_column, axis=1)
y = df[target_column]

In [None]:
# Step 3: Encode non-numeric features
non_numeric_cols = X.select_dtypes(include=["object", "string"]).columns
for col in non_numeric_cols:
    X[col] = X[col].astype("category").cat.codes

In [None]:
# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# Step 5: Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Step 6: Relative accuracy function
def relative_accuracy(y_true, y_pred, tolerance=0.2):
    rel_error = np.abs(y_true - y_pred) / np.abs(y_true)
    return np.mean(rel_error <= tolerance) * 100

In [None]:
# Step 7: Evaluation function
def evaluate_model(name, y_true, y_pred):
    print(f"\n===== {name} =====")
    print("MAE:", round(mean_absolute_error(y_true, y_pred), 4))
    print("MSE:", round(mean_squared_error(y_true, y_pred), 4))
    print("RMSE:", round(np.sqrt(mean_squared_error(y_true, y_pred)), 4))
    print("R² Score:", round(r2_score(y_true, y_pred), 4))
    print("Relative Accuracy (±20%):", round(relative_accuracy(y_true, y_pred), 2), "%")

In [None]:
# Step 8: Ridge Regression + Hyperparameter Tuning
ridge = Ridge()
ridge_params = {'alpha': [0.01, 0.1, 1, 10, 100]}
ridge_cv = GridSearchCV(ridge, ridge_params, cv=5, scoring='r2')
ridge_cv.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_cv.predict(X_test_scaled)
evaluate_model("Ridge Regression", y_test, y_pred_ridge)
print("Best Alpha (λ):", ridge_cv.best_params_['alpha'])


===== Ridge Regression =====
MAE: 0.8111
MSE: 2.882
RMSE: 1.6977
R² Score: 0.971
Relative Accuracy (±20%): 63.58 %
Best Alpha (λ): 0.01


In [None]:
# Step 9: Linear Regression (baseline)
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)
y_pred_lin = lin_reg.predict(X_test_scaled)
evaluate_model("Linear Regression", y_test, y_pred_lin)


===== Linear Regression =====
MAE: 0.8112
MSE: 2.882
RMSE: 1.6977
R² Score: 0.971
Relative Accuracy (±20%): 63.57 %


In [None]:
# Step 10: Lasso Regression
lasso = Lasso()
lasso_cv = GridSearchCV(lasso, {'alpha': [0.01, 0.1, 1, 10]}, cv=5, scoring='r2')
lasso_cv.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_cv.predict(X_test_scaled)
evaluate_model("Lasso Regression", y_test, y_pred_lasso)


===== Lasso Regression =====
MAE: 0.8054
MSE: 2.9061
RMSE: 1.7047
R² Score: 0.9708
Relative Accuracy (±20%): 64.69 %


In [None]:
# Step 11: ElasticNet Regression
enet = ElasticNet()
enet_cv = GridSearchCV(
    enet,
    {'alpha': [0.01, 0.1, 1], 'l1_ratio': [0.2, 0.5, 0.8]},
    cv=5,
    scoring='r2'
)
enet_cv.fit(X_train_scaled, y_train)
y_pred_enet = enet_cv.predict(X_test_scaled)
evaluate_model("ElasticNet Regression", y_test, y_pred_enet)


===== ElasticNet Regression =====
MAE: 0.8122
MSE: 2.9226
RMSE: 1.7096
R² Score: 0.9706
Relative Accuracy (±20%): 63.89 %


In [None]:
# Step 12: Random Forest
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)
evaluate_model("Random Forest", y_test, y_pred_rf)


===== Random Forest =====
MAE: 0.1407
MSE: 0.0911
RMSE: 0.3019
R² Score: 0.9991
Relative Accuracy (±20%): 98.06 %


In [None]:
# Step 13: XGBoost
xgb = XGBRegressor(random_state=42)
xgb.fit(X_train_scaled, y_train)
y_pred_xgb = xgb.predict(X_test_scaled)
evaluate_model("XGBoost", y_test, y_pred_xgb)


===== XGBoost =====
MAE: 0.1745
MSE: 0.1127
RMSE: 0.3357
R² Score: 0.9989
Relative Accuracy (±20%): 95.67 %
