In [99]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier,StackingClassifier,BaggingClassifier,AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.model_selection import train_test_split,KFold,cross_val_score
from sklearn.preprocessing import StandardScaler,MinMaxScaler,OneHotEncoder,LabelEncoder,OrdinalEncoder
from sklearn.metrics import mean_squared_error, root_mean_squared_error,r2_score,accuracy_score,confusion_matrix,f1_score,classification_report
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [100]:
data=pd.read_csv('datasets/wine.csv')

In [101]:
print("Dataset:\n",data.head(3))
print("Shape:\n",data.shape)
print(data.isnull().sum())

Dataset:
    type  Alcohol  Malic_acid   Ash  Alcalinity_of_ash  Magnesium  \
0     1    14.23        1.71  2.43               15.6        127   
1     1    13.20        1.78  2.14               11.2        100   
2     1    13.16        2.36  2.67               18.6        101   

   Total_phenols  Flavanoids  Nonflavanoid_phenols  Proanthocyanins  \
0           2.80        3.06                  0.28             2.29   
1           2.65        2.76                  0.26             1.28   
2           2.80        3.24                  0.30             2.81   

   Color_intensity   Hue  OD280%2FOD315_of_diluted_wines  Proline  
0             5.64  1.04                            3.92     1065  
1             4.38  1.05                            3.40     1050  
2             5.68  1.03                            3.17     1185  
Shape:
 (178, 14)
type                              0
Alcohol                           0
Malic_acid                        0
Ash                               

In [102]:
x=data.drop('type',axis=1)
y=data['type']

In [103]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.85,random_state=42)

In [104]:
model=LogisticRegression(max_iter=10000)
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

acc=accuracy_score(ytest,ypred)
acc

0.881578947368421

In [105]:
sc=StandardScaler()
# sc=MinMaxScaler()
xtrain=sc.fit_transform(xtrain)
xtest=sc.transform(xtest)

In [106]:
model=LogisticRegression()
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

acc=accuracy_score(ytest,ypred)
acc

0.9407894736842105

In [107]:
voting = VotingClassifier(
    estimators=[('lr', LogisticRegression()), ('svc', SVC(probability=True)), ('gnb', GaussianNB()),('dt', DecisionTreeClassifier())],
    voting='soft'  # or 'hard'
    # voting='hard'  # or 'hard'
)

voting.fit(xtrain, ytrain)
ypred = voting.predict(xtest)
print("Voting Accuracy:", accuracy_score(ytest, ypred))

Voting Accuracy: 0.9671052631578947


In [108]:
# Stacking with inline base and meta models
stacking = StackingClassifier(
    estimators=[
        ('knn', LogisticRegression()),
        ('dt', DecisionTreeClassifier()),
        ('rf', RandomForestClassifier()),
        ('svc', SVC())
    ],
    final_estimator=LogisticRegression()
)

# Fit and evaluate
stacking.fit(xtrain, ytrain)
y_pred = stacking.predict(xtest)
print("Stacking Accuracy:", accuracy_score(ytest, y_pred))

Stacking Accuracy: 0.9868421052631579


In [109]:
# Bagging ensemble
bagging = BaggingClassifier(estimator=RandomForestClassifier(), n_estimators=500, random_state=0)
# bagging = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=500, max_samples=0.25,bootstrap=False,  verbose=1,  random_state=42)
# bagging = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=500, max_samples=0.25,bootstrap=True,max_features=0.5,bootstrap_features=True,random_state=42)
bagging.fit(xtrain, ytrain)

# Evaluate
y_pred = bagging.predict(xtest)
print("Bagging Accuracy:", accuracy_score(ytest, y_pred))

Bagging Accuracy: 0.9605263157894737


In [110]:
# Base learner is usually a weak model like shallow Decision Tree
boosting = AdaBoostClassifier(estimator=LogisticRegression(),n_estimators=500,learning_rate=1.0,random_state=0)

boosting.fit(xtrain, ytrain)
y_pred = boosting.predict(xtest)
print("AdaBoost Accuracy:", accuracy_score(ytest, y_pred))

AdaBoost Accuracy: 0.9276315789473685
