In [1]:
from sklearn.ensemble import StackingClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.tree import plot_tree
from sklearn.metrics import roc_auc_score,accuracy_score,log_loss
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.ensemble import RandomForestClassifier


In [2]:
satellite = pd.read_csv('Satellite.csv', sep= ";")
satellite.head(2)

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36,classes
0,92,115,120,94,84,102,106,79,84,102,...,104,88,121,128,100,84,107,113,87,grey soil
1,84,102,106,79,84,102,102,83,80,102,...,100,84,107,113,87,84,99,104,79,grey soil


In [3]:
y= satellite['classes']
X= satellite.drop('classes', axis=1)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=24,test_size=0.3,stratify=y)

In [4]:
knn = KNeighborsClassifier()
nb = GaussianNB()
dt = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True,random_state=24)
lr = LogisticRegression(random_state=24)
rf = RandomForestClassifier(random_state=24)

stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('DT',dt),('SVM',svm)]  , final_estimator = lr)

stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

y_pred_proba = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_proba))

0.905748316934231
0.2709870109450135


In [5]:
knn = KNeighborsClassifier()
nb = GaussianNB()
dt = DecisionTreeClassifier(random_state=24)
svm = SVC(probability=True,random_state=24)
rf = RandomForestClassifier(random_state=24)

stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('DT',dt),('SVM',svm)]  , final_estimator = rf)

stack.fit(X_train,y_train)
y_pred = stack.predict(X_test)
print(accuracy_score(y_test,y_pred))

y_pred_proba = stack.predict_proba(X_test)
print(log_loss(y_test,y_pred_proba))

0.905748316934231
0.3357089071730576


In [6]:
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
rf = RandomForestClassifier(random_state=24)
stack = StackingClassifier(estimators=[('KNN',knn),('NB',nb),('DT',dt),('SVM',svm)]  , final_estimator = lr)

In [7]:
stack.get_params()

{'cv': None,
 'estimators': [('KNN', KNeighborsClassifier()),
  ('NB', GaussianNB()),
  ('DT', DecisionTreeClassifier(random_state=24)),
  ('SVM', SVC(probability=True, random_state=24))],
 'final_estimator__C': 1.0,
 'final_estimator__class_weight': None,
 'final_estimator__dual': False,
 'final_estimator__fit_intercept': True,
 'final_estimator__intercept_scaling': 1,
 'final_estimator__l1_ratio': None,
 'final_estimator__max_iter': 100,
 'final_estimator__multi_class': 'deprecated',
 'final_estimator__n_jobs': None,
 'final_estimator__penalty': 'l2',
 'final_estimator__random_state': 24,
 'final_estimator__solver': 'lbfgs',
 'final_estimator__tol': 0.0001,
 'final_estimator__verbose': 0,
 'final_estimator__warm_start': False,
 'final_estimator': LogisticRegression(random_state=24),
 'n_jobs': None,
 'passthrough': False,
 'stack_method': 'auto',
 'verbose': 0,
 'KNN': KNeighborsClassifier(),
 'NB': GaussianNB(),
 'DT': DecisionTreeClassifier(random_state=24),
 'SVM': SVC(probability

In [10]:
params= {
         'SVM__C':np.linspace(0.001,1,3),
         'DT__max_depth':[None,2,4],
        'passthrough':[True,False],
        'final_estimator__solver':[ 'liblinear']
        }

gcv = GridSearchCV(stack,param_grid=params,scoring='neg_log_loss',verbose=3)

In [11]:
gcv.fit(X,y)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END DT__max_depth=None, SVM__C=0.001, final_estimator__solver=liblinear, passthrough=True;, score=-0.294 total time= 1.3min
[CV 2/5] END DT__max_depth=None, SVM__C=0.001, final_estimator__solver=liblinear, passthrough=True;, score=-0.341 total time= 1.3min
[CV 3/5] END DT__max_depth=None, SVM__C=0.001, final_estimator__solver=liblinear, passthrough=True;, score=-0.318 total time= 1.2min
[CV 4/5] END DT__max_depth=None, SVM__C=0.001, final_estimator__solver=liblinear, passthrough=True;, score=-0.335 total time= 1.3min
[CV 5/5] END DT__max_depth=None, SVM__C=0.001, final_estimator__solver=liblinear, passthrough=True;, score=-0.305 total time= 1.3min
[CV 1/5] END DT__max_depth=None, SVM__C=0.001, final_estimator__solver=liblinear, passthrough=False;, score=-0.280 total time= 1.3min
[CV 2/5] END DT__max_depth=None, SVM__C=0.001, final_estimator__solver=liblinear, passthrough=False;, score=-0.351 total time= 1.2min
[CV 3/

In [12]:
print(gcv.best_params_)
print(gcv.best_score_)

{'DT__max_depth': None, 'SVM__C': 0.001, 'final_estimator__solver': 'liblinear', 'passthrough': True}
-0.3186655587466349


In [13]:
import pickle

In [14]:
best_stack = gcv.best_estimator_

In [15]:
pkfile =  open('stacking_satellite.pkl','wb')
pickle.dumb(best_stack,pkfile)


AttributeError: module 'pickle' has no attribute 'dumb'