# When the individual models having less than 50% accuracy, a voting ensemble can degrade the performance by combining their predictions intelligently

## Import the libraries

In [1]:
from sklearn.linear_model import LinearRegression,SGDRegressor
import seaborn as sns
import numpy as np
from sklearn.preprocessing import PolynomialFeatures,StandardScaler
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import cross_val_score
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

### we will be working with the tips dataset

In [2]:
df = sns.load_dataset("tips")

In [3]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


### creating the class for the Linear regression

In [4]:
lr = LinearRegression()
lr.fit(df.iloc[0:200,0:2],df.iloc[0:200,-1])
lr1 = cross_val_score(lr,df.iloc[0:200,0:2],df.iloc[0:200,-1],cv=5,scoring="r2",)
np.round(np.mean(lr1),2)

0.16

### creating the class for the SGD regression

In [5]:
sgd = SGDRegressor()
sgd.fit(df.iloc[0:200,0:2],df.iloc[0:200,-1])
sgd1 = cross_val_score(sgd,df.iloc[0:200,0:2],df.iloc[0:200,-1],cv=5,scoring="r2",)
np.round(np.mean(sgd1),2)

-87.69

### creating the class for the Decision Tree

In [6]:
dt=DecisionTreeRegressor()
dt.fit(df.iloc[0:200,0:2],df.iloc[0:200,-1])
dt1 = cross_val_score(dt,df.iloc[0:200,0:2],df.iloc[0:200,-1],cv=5,scoring="r2",)
np.round(np.mean(dt1),2)

-0.45

### Using the Voting ensemble 

In [7]:

estimator = [("lr",lr),("sgd",sgd),("dt",dt)]

vc=VotingRegressor(estimators=estimator)

vc1 = cross_val_score(vc,df.iloc[0:200,0:2],df.iloc[0:200,-1],cv=5,scoring="r2",)
np.round(np.mean(vc1),2)

-25.97

In [8]:
vc.fit(df.iloc[0:200,0:2],df.iloc[0:200,-1])

It is a good example that when a model has accuracy less than the 50% then the voting ensemble degrade the accuracy below the mean average

In [9]:
pd.DataFrame(df.iloc[0,[0,1,-1]]).T
#here is the first line of the dataset

Unnamed: 0,total_bill,tip,size
0,16.99,1.01,2


In [10]:
y_predlr = lr.predict(df.iloc[0,[0,1,]].to_numpy().reshape(1, 2))
y_predlr

array([2.20973235])

Here the model is predicting the output as 2.20973235 and the correct output is 2 in which we have an error of (2.209-2)

In [11]:
y_predsgd = sgd.predict(df.iloc[0,[0,1,]].to_numpy().reshape(1, 2))
y_predsgd

array([3.45045889])

Here the SGD Regressor model is predicting the output as 2.07327943 and the correct output is 2 in which we have an error of (2.07327943-2)

In [12]:
y_preddt = dt.predict(df.iloc[0,[0,1,]].to_numpy().reshape(1, 2))
y_preddt

array([2.])

Here the Decision Tree model is predicting the output as 2 and the correct output is 2 in which we have an error of (0)

In [13]:
y_predvc = vc.predict(df.iloc[0,[0,1,]].to_numpy().reshape(1, 2))
y_predvc

array([-0.61010013])

(2.07327943-2) (2.209-2) (0) This are the errors terms accroding to the different model selected indiviusally <br>
(2.72979903 - 2) This is the error of the voting ensemble technique for this accuracy of this model 

Due to this error which is more for when we calculate for all the rows in the data (more than 50%) therefore the voting ensemble is giving the accuracy more less than the model indivisually