In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_regression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [2]:
X, y = make_regression(n_samples=10000, n_features=10, n_informative=3)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
'''
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)
'''

'\nfrom sklearn.preprocessing import StandardScaler\n\nsc = StandardScaler()\nX_train_sc = sc.fit_transform(X_train)\nX_test_sc = sc.transform(X_test)\n'

In [5]:
model_dt = DecisionTreeRegressor(random_state=42)
model_dt.fit(X_train, y_train)
y_pred_dt = model_dt.predict(X_test)

print("Decision Tree R2-Score: ", r2_score(y_test, y_pred_dt))
print("Decision Tree MAE: ", mean_absolute_error(y_test, y_pred_dt))
print("Decision Tree MSE: ", mean_squared_error(y_test, y_pred_dt))
print("Decision Tree RMSE: ", np.sqrt(mean_squared_error(y_test, y_pred_dt)))

Decision Tree R2-Score:  0.9848362532239926
Decision Tree MAE:  5.231724676699118
Decision Tree MSE:  52.226125800348406
Decision Tree RMSE:  7.226764545794224


### **Bagging**

In [7]:
bag = BaggingRegressor(estimator=DecisionTreeRegressor(),
                        n_estimators=500,
                        max_samples=0.5,
                        bootstrap=True,
                        random_state=42,
                        )

In [8]:
bag.fit(X_train, y_train)

In [9]:
y_pred_bag = bag.predict(X_test)

In [10]:
print("Bagged Tree R2-Score: ", r2_score(y_test, y_pred_bag))
print("Bagged Tree MAE: ", mean_absolute_error(y_test, y_pred_bag))
print("Bagged Tree MSE: ", mean_squared_error(y_test, y_pred_bag))
print("Bagged Tree RMSE: ", np.sqrt(mean_squared_error(y_test, y_pred_bag)))

Bagged Tree R2-Score:  0.994210657437074
Bagged Tree MAE:  2.5631059833001384
Bagged Tree MSE:  19.93932881226149
Bagged Tree RMSE:  4.465347557834831


### **Random Forest**

In [11]:
model_rf = RandomForestRegressor(random_state=42, n_estimators=500)
model_rf.fit(X_train, y_train)
y_pred_rf = model_rf.predict(X_test)

print("RF R2-Score: ", r2_score(y_test, y_pred_rf))
print("RF MAE: ", mean_absolute_error(y_test, y_pred_rf))
print("RF MSE: ", mean_squared_error(y_test, y_pred_rf))
print("RF RMSE: ", np.sqrt(mean_squared_error(y_test, y_pred_rf)))

RF R2-Score:  0.9953195859080426
RF MAE:  2.3284243518712144
RF MSE:  16.12001959509466
RF RMSE:  4.014974420229183


### **Bagging using SVM**

In [12]:
bag_svm = BaggingRegressor(estimator=SVR(),
                        n_estimators=500,
                        max_samples=0.25,
                        bootstrap=True,
                        random_state=42,
                        )

In [13]:
bag_svm.fit(X_train, y_train)
y_pred_bagsvm = bag_svm.predict(X_test)

print("BR (SVR) R2-Score: ", r2_score(y_test, y_pred_bagsvm))
print("BR (SVR) MAE: ", mean_absolute_error(y_test, y_pred_bagsvm))
print("BR (SVR) MSE: ", mean_squared_error(y_test, y_pred_bagsvm))
print("BR (SVR) RMSE: ", np.sqrt(mean_squared_error(y_test, y_pred_bagsvm)))

BR (SVR) R2-Score:  0.7841479265537701
BR (SVR) MAE:  16.88638716649231
BR (SVR) MSE:  743.425599793419
BR (SVR) RMSE:  27.265832094279077


### **Pasting**

In [14]:
pasting = BaggingRegressor(estimator=DecisionTreeRegressor(),
                        n_estimators=500,
                        max_samples=0.25,
                        bootstrap=False,
                        random_state=42,
                        )

In [15]:
pasting.fit(X_train, y_train)
y_pred_pasting = pasting.predict(X_test)

print("Pasting R2-Score: ", r2_score(y_test, y_pred_pasting))
print("Pasting MAE: ", mean_absolute_error(y_test, y_pred_pasting))
print("Pasting MSE: ", mean_squared_error(y_test, y_pred_pasting))
print("Pasting RMSE: ", np.sqrt(mean_squared_error(y_test, y_pred_pasting)))

Pasting R2-Score:  0.9921911808714508
Pasting MAE:  3.01451561851165
Pasting MSE:  26.894696685719886
Pasting RMSE:  5.1860097074455895
