In [59]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline  
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold


from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import StackingClassifier

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score

import warnings
warnings.filterwarnings("ignore")


In [46]:
df = pd.read_csv(r'C:\Users\DAI.STUDENTSDC\Desktop\Machine Learning\Data Sets\Cases\Glass Identification\Glass.csv', index_col=0)

In [47]:
df

Unnamed: 0_level_0,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
RI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,building_windows_float_processed
1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,building_windows_float_processed
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,building_windows_float_processed
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,building_windows_float_processed
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,building_windows_float_processed
...,...,...,...,...,...,...,...,...,...
1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,headlamps
1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,headlamps
1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,headlamps
1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,headlamps


In [48]:
X = df.drop(['Type'],axis=1)
y = df['Type']

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)

Passthrough -> False

In [50]:
nb = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=5) 
dtc = DecisionTreeClassifier(random_state=24)  
svm = SVC(probability=True, random_state=24)
lr = LogisticRegression(random_state=24)
rf = RandomForestClassifier(random_state=24)

In [51]:
stack = StackingClassifier(
    estimators=[
        ('knn', knn),
        ('dtc', dtc),
        ('nb', nb),    
        ('svm', svm), 
    ],
    final_estimator=lr
)
stack.fit(X_train, y_train)

In [52]:
y_pred = stack.predict(X_test)
y_pred_proba = stack.predict_proba(X_test)
print(accuracy_score(y_test, y_pred), roc_auc_score(y_test, y_pred_proba, multi_class="ovr"), log_loss(y_test, y_pred_proba))

0.6153846153846154 0.8972442245133719 0.9596137252169026


---
Passthrough -> True

In [53]:
nb = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=5) 
dtc = DecisionTreeClassifier(random_state=24)  
svm = SVC(probability=True, random_state=24)
lr = LogisticRegression(random_state=24)
rf = RandomForestClassifier(random_state=24)

In [54]:
stack = StackingClassifier(
    estimators=[
        ('knn', knn),
        ('dtc', dtc),
        ('nb', nb),    
        ('svm', svm), 
    ],
    final_estimator=rf,
    passthrough=True
)
stack.fit(X_train, y_train)

In [55]:
y_pred = stack.predict(X_test)
y_pred_proba = stack.predict_proba(X_test)
print(accuracy_score(y_test, y_pred), roc_auc_score(y_test, y_pred_proba, multi_class="ovr"), log_loss(y_test, y_pred_proba))

0.6307692307692307 0.9143665717218763 1.355075823571826


--- 
Using GridSearchCV

In [None]:
nb = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=5) 
dtc = DecisionTreeClassifier(random_state=24)  
svm = SVC(probability=True, random_state=24)
lr = LogisticRegression(random_state=24)
rf = RandomForestClassifier(random_state=24)

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

stack = StackingClassifier(
    estimators=[
        ('knn', knn),
        ('dtc', dtc),
        ('nb', nb),    
        ('svm', svm), 
    ],
)
params = [
    {
        # Fror linear regression as final estimator
        'knn__n_neighbors': [1, 2, 3, 4, 5],
        'dtc__max_depth': [1, 2, 3, 4, 5],
        'nb__var_smoothing': [0.01, 0.1, 0.2, 0.3, 0.4],
        'svm__C': [0.01, 0.1, 0.2, 0.3, 0.4],
        'final_estimator': [lr],
        'final_estimator__C': [0.01, 0.1, 1, 10],    
        'passthrough': [True, False]
    },
    {
        # For random forest as final estimator
        'knn__n_neighbors': [1, 2, 3, 4, 5],
        'dtc__max_depth': [1, 2, 3, 4, 5],
        'nb__var_smoothing': [0.01, 0.1, 0.2, 0.3, 0.4],
        'svm__C': [0.01, 0.1, 0.2, 0.3, 0.4],
        'final_estimator': [rf],
        'final_estimator__n_estimators': [10, 20, 30, 40, 50],  
        'final_estimator__max_depth': [1, 2, 3, 4, 5],
        'passthrough': [True, False]
    }
]


grid = GridSearchCV(
    estimator=stack, 
    param_grid=params,
    cv=kfold, 
    scoring='neg_log_loss'
)


In [None]:
grid.fit(X_train, y_train)

In [None]:
grid.best_score_, grid.best_params_