In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.model_selection import GridSearchCV

In [2]:
X_train = pd.read_csv('dataset/X_train.csv')
y_train = pd.read_csv('dataset/y_train.csv')
X_test = pd.read_csv('dataset/X_test.csv')
y_test = pd.read_csv('dataset/y_test.csv')

In [46]:
X_train.shape, y_train.shape

((486, 24), (486, 1))

In [47]:
X_test.shape,y_test.shape

((163, 24), (163, 1))

## Training Models with hyperparameters

### 1. Linear Regression

Linear Regression doesn't have hyperparameters to tune

### 2. Decision Tree Regressor

In [68]:
# Define the parameter grid
param_grid = {
    'max_depth': [None, 10, 20, 30, 50],
    'min_samples_split': [2, 5, 10, 20],
    'min_samples_leaf': [1, 2, 5, 10],
    'max_features': [None, 'sqrt', 'log2']
}

In [69]:
# Initialize the model
dt_model = DecisionTreeRegressor(random_state=42)

# Initialize GridSearchCV
grid_search_dt = GridSearchCV(estimator=dt_model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')

In [70]:
# Fit the model
grid_search_dt.fit(X_train, y_train)

# Best model
best_dt_model = grid_search_dt.best_estimator_
best_dt_model

In [71]:
# Make predictions
dt_predictions = best_dt_model.predict(X_test)

# Evaluate the model
dt_mse = mean_squared_error(y_test, dt_predictions)
dt_mae = mean_absolute_error(y_test, dt_predictions)
dt_r2 = r2_score(y_test, dt_predictions)

print("Decision Tree Regression with hyperparameter - MSE:", dt_mse, "MAE:", dt_mae, "R²:", dt_r2)

Decision Tree Regression with hyperparameter - MSE: 2.352582314763241 MAE: 0.9533271935665618 R²: 0.7805198233050861


### 3. Random Forest Regressor

In [73]:
# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5],
    'max_features': ['sqrt', 'log2']
}

In [74]:
# Initialize the model
rf_model = RandomForestRegressor(random_state=42)

# Initialize GridSearchCV
grid_search_rf = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')

In [75]:
# Fit the model
grid_search_rf.fit(X_train, y_train.values.ravel())

# Best model
best_rf_model = grid_search_rf.best_estimator_
best_rf_model

In [76]:
# Make predictions
rf_predictions = best_rf_model.predict(X_test)

# Evaluate the model
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_mae = mean_absolute_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)

print("Random Forest Regression with hyperparameter - MSE:", rf_mse, "MAE:", rf_mae, "R²:", rf_r2)

Random Forest Regression with hyperparameter - MSE: 2.5286058282208588 MAE: 0.9363803680981595 R²: 0.764098008181467


### 4. Gradient Boosting Regressor

In [78]:
# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.001, 0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5]
}

In [79]:
# Initialize the model
gb_model = GradientBoostingRegressor(random_state=42)

# Initialize GridSearchCV
grid_search_gb = GridSearchCV(estimator=gb_model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')

In [80]:
# Fit the model
grid_search_gb.fit(X_train, y_train.values.ravel())

# Best model
best_gb_model = grid_search_gb.best_estimator_
best_gb_model

In [81]:
# Make predictions
gb_predictions = best_gb_model.predict(X_test)

# Evaluate the model
gb_mse = mean_squared_error(y_test, gb_predictions)
gb_mae = mean_absolute_error(y_test, gb_predictions)
gb_r2 = r2_score(y_test, gb_predictions)

print("Gradient Boosting Regression with hyperparameter - MSE:", gb_mse, "MAE:", gb_mae, "R²:", gb_r2)

Gradient Boosting Regression with hyperparameter - MSE: 1.8514011369216847 MAE: 0.8118648570374999 R²: 0.8272766712073027


### 5. Support Vector Regressor

In [82]:
# Define the parameter grid
param_grid = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1],
    'epsilon': [0.001, 0.01, 0.1, 0.2]
}

In [84]:
# Initialize the model
svr_model = SVR()

# Initialize GridSearchCV
grid_search_svr = GridSearchCV(estimator=svr_model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')

In [85]:
# Fit the model
grid_search_svr.fit(X_train, y_train.values.ravel())

# Best model
best_svr_model = grid_search_svr.best_estimator_
best_svr_model

In [86]:
# Make predictions
svr_predictions = best_svr_model.predict(X_test)

# Evaluate the model
svr_mse = mean_squared_error(y_test, svr_predictions)
svr_mae = mean_absolute_error(y_test, svr_predictions)
svr_r2 = r2_score(y_test, svr_predictions)

print("Support Vector Regression with hyperparameter - MSE:", svr_mse, "MAE:", svr_mae, "R²:", svr_r2)

Support Vector Regression with hyperparameter - MSE: 1.7537573854670754 MAE: 0.8016648302700521 R²: 0.8363861793796316
