In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [2]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
from sklearn.metrics import accuracy_score

## Importing data

In [5]:
df = pd.read_csv('C:/Users/Lenovo/Documents/Documents/Data_Set/Iris.csv',index_col=['Id'])
df.head()

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5.1,3.5,1.4,0.2,Iris-setosa
2,4.9,3.0,1.4,0.2,Iris-setosa
3,4.7,3.2,1.3,0.2,Iris-setosa
4,4.6,3.1,1.5,0.2,Iris-setosa
5,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
ord_rank = {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2}
df['Species_ordinal'] = df['Species'].map(ord_rank)
df.head()

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,Species_ordinal
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,5.1,3.5,1.4,0.2,Iris-setosa,0
2,4.9,3.0,1.4,0.2,Iris-setosa,0
3,4.7,3.2,1.3,0.2,Iris-setosa,0
4,4.6,3.1,1.5,0.2,Iris-setosa,0
5,5.0,3.6,1.4,0.2,Iris-setosa,0


In [7]:
X = df.iloc[:,:-2]
y = df.iloc[:,-1]

In [8]:
# splitting data on training and tetsing
x_train_all, x_test, y_train_all, y_test = train_test_split(X, y, random_state=35, test_size=0.3)

In [9]:
## splitting x_train_all data on x_train and x_val
x_train, x_val, y_train, y_val = train_test_split(x_train_all, y_train_all, random_state=25, test_size=0.3)

## There is no such library in sklearn for blending
## so we use from scratch

In [10]:
## get alist of base models

def get_models():
    level0 = {'lr' :LogisticRegression(max_iter=1000),
              'svc': SVC(),
              'knn': KNeighborsClassifier(n_neighbors=35),
              'decision': DecisionTreeClassifier(),
              'naive_gaussin': GaussianNB()
             }
    level1 = {'lr': LogisticRegression()}
    return level0

In [17]:
# fit blending
def fit_blending(models, x_train, x_val, y_train, y_val):
    
    metaX = list()
    
    for name, model in models.items():
        #fit in training data
        model.fit(x_train, y_train)
        #predict on val data
        x_val_pred = model.predict(x_val)
        # reshape predictions into a matrix with one column
        x_val_pred = x_val_pred.reshape(len(x_val_pred), 1)
        # store x_val_predictions as input for blending
        metaX.append(x_val_pred)
        
        
    # create 2d array from predictions, each set is an input feature
    metaX = np.hstack(metaX)
    
    #taking blender 
    blender = LogisticRegression(max_iter=1000)
    #fitting metaX which is predicted 2-d array of multpile classification algorithms
    #algorithms on blender so as to predict for x_test
    blender.fit(metaX, y_val)
    #return object models and object blender fitted with their class
    return models, blender
        
    
    

In [18]:
fit_blending(get_models(), x_train, x_val, y_train, y_val)[0]

{'lr': LogisticRegression(max_iter=1000),
 'svc': SVC(),
 'knn': KNeighborsClassifier(n_neighbors=35),
 'decision': DecisionTreeClassifier(),
 'naive_gaussin': GaussianNB()}

In [19]:
def predict_blending_ensemble(models, blender, x_test):
    
    #making metaX fro test dataset
    metaX = list()
    
    for name,model in models.items():
        # predicting on test data
        x_test_pred = model.predict(x_test)
        #reshape x_test_pred into 1column
        x_test_pred = x_test_pred.reshape(len(x_test_pred), 1)
        #appending the predicted data into meta_xtrain
        metaX.append(x_test_pred)
        
     # create 2d array from predictions, each set is an input feature
    metaX = np.hstack(metaX)
    #predict on test metaX
    return blender.predict(metaX)
    

In [21]:
models = get_models()
blender = fit_blending(models, x_train, x_val, y_train, y_val)
x_test_pred = predict_blending_ensemble(models=blender[0], blender=blender[1], x_test=x_test)
round(accuracy_score(y_test, x_test_pred)*100,3)

93.333