### Custom or hybrid different ensemble learning algorithms

In [2]:
##Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn
import matplotlib.pyplot as plt 

In [2]:
## For ignoring warnings to view clean output
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [3]:
## Importing the dataset
df = pd.read_csv('cleaned.csv',header=0)

In [4]:
df.drop(['Year'],axis=1, inplace=True)

In [5]:
df.head()

Unnamed: 0,Location,Soil Type,Max_Temperature,Min_Teperature,Rainfall,Humidity,Yield Kg/ha
0,Chefe Donsa,Black Soil,31.0,13.0,811.2,68.0,980.0
1,Chefe Donsa,Black Soil,30.5,16.0,811.2,72.0,670.0
2,Chefe Donsa,Black Soil,29.5,14.0,811.2,68.0,730.0
3,Chefe Donsa,Black Soil,30.0,13.0,811.2,54.0,710.0
4,Chefe Donsa,Black Soil,31.0,12.0,811.2,70.0,780.0


In [6]:
### Separating Independent and Dependent feature
X = df.iloc[:,:-1]
y = df.iloc[:, 6]

In [7]:
# Shows the number of columns after creating dummy variables
pd.get_dummies(df,drop_first=True).shape

(8077, 12)

In [8]:
X = pd.get_dummies(X, drop_first=True)

In [9]:
X.head()

Unnamed: 0,Max_Temperature,Min_Teperature,Rainfall,Humidity,Location_Alemtena,Location_Chefe Donsa,Location_DZ-BS,Location_DZ-LS,Location_Denkaka,Location_Minjar,Soil Type_Light Soil
0,31.0,13.0,811.2,68.0,0,1,0,0,0,0,0
1,30.5,16.0,811.2,72.0,0,1,0,0,0,0,0
2,29.5,14.0,811.2,68.0,0,1,0,0,0,0,0
3,30.0,13.0,811.2,54.0,0,1,0,0,0,0,0
4,31.0,12.0,811.2,70.0,0,1,0,0,0,0,0


In [40]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [41]:
# print the shapes of our training and test set
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(5653, 11)
(2424, 11)
(5653,)
(2424,)


### Model Building with Random Forest and AdaBoost Regressor

In [78]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.pipeline import Pipeline

# get models
# get a voting ensemble of models
# define the base models
models = list()

AdaBoost = Pipeline([('m', AdaBoostRegressor())])
models.append(('AdaBoost', AdaBoost))

randomforest = Pipeline([('m', RandomForestRegressor())])
models.append(('randomforest', randomforest))

# define the voting ensemble
ensemble1 = VotingRegressor(estimators=models)

In [60]:
models

[('AdaBoost', Pipeline(steps=[('m', AdaBoostRegressor())])),
 ('randomforest', Pipeline(steps=[('m', RandomForestRegressor())]))]

In [61]:
ensemble1

VotingRegressor(estimators=[('AdaBoost',
                             Pipeline(steps=[('m', AdaBoostRegressor())])),
                            ('randomforest',
                             Pipeline(steps=[('m', RandomForestRegressor())]))])

In [79]:
ensemble1.fit(X_train, y_train)

VotingRegressor(estimators=[('AdaBoost',
                             Pipeline(steps=[('m', AdaBoostRegressor())])),
                            ('randomforest',
                             Pipeline(steps=[('m', RandomForestRegressor())]))])

In [80]:
pred=ensemble1.predict(X_test)

In [81]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

print('MSE =', mse(pred, y_test))
print('MAE =', mae(pred, y_test))
print('R2 Score =', r2_score(pred, y_test))

MSE = 444563.8055766399
MAE = 495.31213675242986
R2 Score = 0.40142494308036925


### Model Building with AdaBoost and GradientBoostingRegressor

In [82]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.pipeline import Pipeline

models = list()

AdaBoost = Pipeline([('m', AdaBoostRegressor())])
models.append(('AdaBoost', AdaBoost))

GradientBoost = Pipeline([('m', GradientBoostingRegressor())])
models.append(('GradientBoost', GradientBoost))

# define the voting ensemble
ensemble2 = VotingRegressor(estimators=models)

In [83]:
ensemble2.fit(X_train, y_train)

VotingRegressor(estimators=[('AdaBoost',
                             Pipeline(steps=[('m', AdaBoostRegressor())])),
                            ('GradientBoost',
                             Pipeline(steps=[('m',
                                              GradientBoostingRegressor())]))])

In [84]:
pred=ensemble2.predict(X_test)

In [85]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

print('MSE =', mse(pred, y_test))
print('MAE =', mae(pred, y_test))
print('R2 Score =', r2_score(pred, y_test))

MSE = 444584.97739491425
MAE = 495.84134723693757
R2 Score = 0.3827304492221174


### Model Building with Random Forest and GradientBoostingRegressor

In [86]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.pipeline import Pipeline

# get models
# get a voting ensemble of models
# define the base models
models = list()

randomforest = Pipeline([('m', RandomForestRegressor())])
models.append(('randomforest', randomforest))

GradientBoost = Pipeline([('m', GradientBoostingRegressor())])
models.append(('GradientBoost', GradientBoost))

# define the voting ensemble
ensemble3 = VotingRegressor(estimators=models)

In [87]:
ensemble3.fit(X_train, y_train)

VotingRegressor(estimators=[('randomforest',
                             Pipeline(steps=[('m', RandomForestRegressor())])),
                            ('GradientBoost',
                             Pipeline(steps=[('m',
                                              GradientBoostingRegressor())]))])

In [88]:
pred=ensemble3.predict(X_test)

In [89]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

print('MSE =', mse(pred, y_test))
print('MAE =', mae(pred, y_test))
print('R2 Score =', r2_score(pred, y_test))

MSE = 410056.766035087
MAE = 451.8460524847102
R2 Score = 0.5106952000243205


### Model Building with Random Forest, AdaBoost and GradientBoostingRegressor

In [90]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.pipeline import Pipeline

# get models
# get a voting ensemble of models
# define the base models
models = list()

AdaBoost = Pipeline([('m', AdaBoostRegressor())])
models.append(('AdaBoost', AdaBoost))

randomforest = Pipeline([('m', RandomForestRegressor())])
models.append(('randomforest', randomforest))

GradientBoost = Pipeline([('m', GradientBoostingRegressor())])
models.append(('GradientBoost', GradientBoost))

# define the voting ensemble
ensemble = VotingRegressor(estimators=models)

In [91]:
ensemble.fit(X_train, y_train)

VotingRegressor(estimators=[('AdaBoost',
                             Pipeline(steps=[('m', AdaBoostRegressor())])),
                            ('randomforest',
                             Pipeline(steps=[('m', RandomForestRegressor())])),
                            ('GradientBoost',
                             Pipeline(steps=[('m',
                                              GradientBoostingRegressor())]))])

In [92]:
pred=ensemble.predict(X_test)

In [93]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

print('MSE =', mse(pred, y_test))
print('MAE =', mae(pred, y_test))
print('R2 Score =', r2_score(pred, y_test))

MSE = 416031.4095011485
MAE = 469.66776351219977
R2 Score = 0.465195428795225
