In [1]:
#Load Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#Load Dataset
iris = pd.read_csv('./iris.csv')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
#Create x and y variables
x = iris.drop('species', axis=1).values
y = iris['species'].values

#Train dataset using 20% test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=100)

#Fix the imbalanced Classes
from imblearn.over_sampling import SMOTE
smt=SMOTE(random_state=100)
x_train_smt,y_train_smt = smt.fit_resample(x_train,y_train)

#Scale the Data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train2 = sc.fit_transform(x_train_smt)
x_test2 = sc.fit_transform(x_test)

x_2 = sc.fit_transform(x)

Using TensorFlow backend.


In [4]:
#Class Balance - Test Data
print('Train Data - Class Split')
num_set = (y_train_smt == 'setosa').sum()
num_versi = (y_train_smt == 'versicolor').sum()
num_virg = (y_train_smt == 'virginica').sum()
print('Setosa -',  num_set)
print('Versicolor -',  num_versi)
print('Virginica -',  num_virg)

Train Data - Class Split
Setosa - 40
Versicolor - 40
Virginica - 40


In [5]:
#Import Models
from sklearn.svm import SVC
models2 = [SVC(kernel='linear')]

In [6]:
#Create Model Comparison
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_validate

#Scoring Parameters
scoring = {'acc': 'accuracy'}

#Model Creation
MLA_columns = []
MLA_compare = pd.DataFrame(columns = MLA_columns)

row_index = 0
for alg in models2:
    
    #SVM Model
    predicted = alg.fit(x_train2, y_train_smt).predict(x_test2)
    
    #K-Fold CV
    kfcv = alg
    kf = KFold(n_splits=10, shuffle=True, random_state=100)
    KFcv = cross_validate(kfcv, x_2, y, scoring=scoring,
                         cv=kf)
    
    #Repeated K-Fold CV
    rkfcv = alg.fit(x_train2, y_train_smt)
    rkf = RepeatedKFold(n_splits=10, n_repeats=3, random_state=100)
    rKFcv = cross_validate(rkfcv, x_2, y, scoring=scoring,
                         cv=rkf)
    
    #LOOCV
    loomodel=alg.fit(x_train2, y_train_smt)
    loo = LeaveOneOut()
    loocv = cross_validate(loomodel, x_2, y, scoring=scoring,
                         cv=loo)
    
    #Shuffle-Split
    rsmodel=alg.fit(x_train2, y_train_smt)
    rs = ShuffleSplit(n_splits=10, test_size=.20, random_state=100)
    rscv = cross_validate(rsmodel, x_2, y, scoring=scoring,
                         cv=rs)
        
    MLA_name = alg.__class__.__name__
    MLA_compare.loc[row_index,'Algorithm Name'] = MLA_name
    MLA_compare.loc[row_index, 'Test Accuracy'] = round(alg.score(x_test2, y_test), 2)
    MLA_compare.loc[row_index, 'KFcv Model'] = round(KFcv['test_acc'].mean(),2)
    MLA_compare.loc[row_index, 'rKFcv Model'] = round(rKFcv['test_acc'].mean(),2)
    MLA_compare.loc[row_index, 'Loocv Model'] = round(loocv['test_acc'].mean(),2)
    MLA_compare.loc[row_index, 'ShuffleSplit Model'] = round(rscv['test_acc'].mean(),2)
    
    row_index+=1

MLA_compare

Unnamed: 0,Algorithm Name,Test Accuracy,KFcv Model,rKFcv Model,Loocv Model,ShuffleSplit Model
0,SVC,0.97,0.97,0.96,0.96,0.98


In [7]:
#Bootstrapping
from sklearn.utils import resample
models3 = SVC(kernel='linear')
x2 = x
y2 = y

bootx2 = resample(x2, replace=True, n_samples=len(x2), random_state=100)
booty2 = resample(y2, replace=True, n_samples=len(y2), random_state=100)

#Train dataset using 20% test set
from sklearn.model_selection import train_test_split
x_trainboot,x_testboot,y_trainboot,y_testboot=train_test_split(bootx2,booty2,test_size=0.2,random_state=100)

#Fix the imbalanced Classes
x_trainboot_smt,y_trainboot_smt = smt.fit_resample(x_trainboot,y_trainboot)

#Scale the Data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_trainboot2 = sc.fit_transform(x_trainboot_smt)
x_testboot2 = sc.fit_transform(x_testboot)

predicted = models3.fit(x_trainboot2, y_trainboot_smt).predict(x_testboot2)
print('Bootstrapping Accuracy:',round(models3.score(x_testboot2, y_testboot), 2))

Bootstrapping Accuracy: 0.97
