Import necessary modules

In [16]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb

from sklearn.metrics import mean_squared_error

#### Load processed dataset

In [17]:
# load dataset
data = pd.read_csv('Processed_Train.csv')

# descriptive features
X = data.iloc[:,1:-1]
X = X.select_dtypes(include=np.number)

# target label
y = data.iloc[:,-1].values

print("Descriptive Features (X) Dimensions: ", X.shape)
print("Target Label (y) dimensions: ", y.shape)

Descriptive Features (X) Dimensions:  (3870, 42)
Target Label (y) dimensions:  (3870,)


#### Split Train data in the ratio of 75:25 for training & Validation

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1234)

#### 1. Linear Regression

In [19]:
# Initialize and train the Linear Regresson
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_lr = lin_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_lr, squared=False)
print("RMSE of Linear Regression: ", rmse)

RMSE of Linear Regression:  404.10701021694433


#### 2. Support Vector Regressor (SVR) 

In [20]:
# Initialize and train the Support Vector Regressor (SVR)
svr_reg = SVR(kernel="linear")  # choose a different kernel (linear, rbf, sigmoid) if needed
svr_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_svr = svr_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_svr, squared=False)
print("RMSE of SVR: ", rmse)

RMSE of SVR:  439.1328876347328


#### 3. Random Forest Regressor

In [21]:
# Initialize and train the Random Forest Regressor (SVR)
rf_reg = RandomForestRegressor(random_state=1234)
rf_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_rf = rf_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_rf, squared=False)
print("RMSE of Random Forest Regressor: ", rmse)

RMSE of Random Forest Regressor:  443.37985056341984


#### 4. XGBoost Regressor

In [22]:
# Initialize and train the XGBoost Regressor
xgb_reg = xgb.XGBRegressor(random_state=1234)
xgb_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_xgb = xgb_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_xgb, squared=False)
print("RMSE of XGBoost Regressor: ", rmse)

RMSE of XGBoost Regressor:  441.42350155200285


In [23]:
# Display All models predictions
for i in range(len(y_test)):
    print(y_test[i], y_pred_lr[i], y_pred_svr[i], y_pred_rf[i], y_pred_xgb[i], sep=" :: ")

200.0 :: 150.00374156332214 :: 150.60450697220392 :: 193.25 :: 205.72795
910.0 :: 975.3981395733015 :: 966.196802048611 :: 899.87 :: 819.2026
400.0 :: 470.9039474747942 :: 546.2610867780912 :: 384.7 :: 341.4524
800.0 :: 1108.0457246831327 :: 1234.1937740093726 :: 863.0 :: 754.11127
400.0 :: 580.9259680603695 :: 535.3741868272645 :: 420.3 :: 340.5859
480.0 :: 517.5135926391551 :: 437.5633266906533 :: 489.81 :: 530.57104
300.0 :: 420.1077533886091 :: 411.0960211337241 :: 340.83 :: 295.55673
1500.0 :: 1254.6623488910911 :: 1148.097155392361 :: 1315.76 :: 1430.9052
540.0 :: 521.8626719206044 :: 688.3366486583999 :: 667.33 :: 579.1475
250.0 :: 211.45841716105497 :: 412.337429738678 :: 268.8 :: 307.66595
200.0 :: 330.1560083303019 :: 189.1174363218032 :: 200.0 :: 209.23575
300.0 :: 367.1855532065375 :: 404.3523020597221 :: 354.15 :: 353.1705
450.0 :: 693.9369957368389 :: 286.71909884792865 :: 412.35 :: 393.36526
700.0 :: 664.0547905964779 :: 474.89646316661845 :: 670.65 :: 675.46204
100.0 ::