In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/AI/selected_features_dataset.csv')

# Split features and target
X = df.drop(columns=["actual_time"])
y = df["actual_time"]

# Convert categorical columns using One-Hot Encoding
X_encoded = pd.get_dummies(X, drop_first=True)  # drop_first=True to avoid dummy variable trap

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LinearRegression

lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)

print("🔹 Linear Regression")
print("R² Score:", r2_score(y_test, y_pred_lr))
print("MSE:", mean_squared_error(y_test, y_pred_lr))


🔹 Linear Regression
R² Score: 0.9791569061599675
MSE: 2.0727412108588994


In [None]:
from sklearn.linear_model import Ridge

ridge_model = Ridge(alpha=1.0)  # alpha is the regularization strength
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)

print("\n🔹 Ridge Regression (L2 Norm)")
print("R² Score:", r2_score(y_test, y_pred_ridge))
print("MSE:", mean_squared_error(y_test, y_pred_ridge))



🔹 Ridge Regression (L2 Norm)
R² Score: 0.979144638375706
MSE: 2.0739611805140825


In [None]:
from sklearn.svm import LinearSVR

svr_model = LinearSVR(epsilon=0.1, max_iter=10000)
svr_model.fit(X_train_scaled, y_train)
y_pred_svr = svr_model.predict(X_test_scaled)

print("\n🔹 Linear SVR")
print("R² Score:", r2_score(y_test, y_pred_svr))
print("MSE:", mean_squared_error(y_test, y_pred_svr))





🔹 Linear SVR
R² Score: 0.977161325675294
MSE: 2.2711916876410103


In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gbr_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbr_model.fit(X_train, y_train)  # Gradient Boosting doesn't need scaled features
y_pred_gbr = gbr_model.predict(X_test)

print("\n🔹 Gradient Boosted Regression")
print("R² Score:", r2_score(y_test, y_pred_gbr))
print("MSE:", mean_squared_error(y_test, y_pred_gbr))



🔹 Gradient Boosted Regression
R² Score: 0.9974376872937933
MSE: 0.25480915559000367


In [None]:
results = pd.DataFrame({
    "Model": ["Linear Regression", "Ridge Regression", "Linear SVR", "Gradient Boosting"],
    "R2 Score": [
        r2_score(y_test, y_pred_lr),
        r2_score(y_test, y_pred_ridge),
        r2_score(y_test, y_pred_svr),
        r2_score(y_test, y_pred_gbr)
    ],
    "MSE": [
        mean_squared_error(y_test, y_pred_lr),
        mean_squared_error(y_test, y_pred_ridge),
        mean_squared_error(y_test, y_pred_svr),
        mean_squared_error(y_test, y_pred_gbr)
    ]
})

print("\n📊 Model Comparison:\n", results)



📊 Model Comparison:
                Model  R2 Score       MSE
0  Linear Regression  0.979157  2.072741
1   Ridge Regression  0.979145  2.073961
2         Linear SVR  0.977161  2.271192
3  Gradient Boosting  0.997438  0.254809
