In [2]:
import numpy as np

from scipy.io import loadmat

import pandas as pd

from numpy.linalg import eig

from scipy.stats import mode

from sklearn.metrics import balanced_accuracy_score

from sklearn.model_selection import StratifiedKFold

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import warnings

warnings.filterwarnings("ignore")

from sklearn.utils import shuffle

In [3]:
data = loadmat( 'Databases/climate.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(540, 18)
(540, 1)


# PCA

In [17]:
class PCA :
    
    
    
    
    # ---------------function to calculate eigen values and eigen vector for any matrix
    
    
    def eig_vector( self, X ) :
        
        
        
        # centralize
    
        mean = np.mean( X, 0 )
        
        X_stand = X - mean
        
        
    
        # calculate co-variance matrix
    
        X_cov = np.cov( np.transpose( X_stand ) )
        
        
    
        # find the eigenvalues and eigenvectors
    
        e, V = eig( X_cov )
        
        
    
        # sort eigen vector according to eigen values 
        
        idx = np.argsort( -e )

        e = e[idx]

        V = V[:,idx]
        
        m, n = V.shape
        
        return e, V 



    # ----------------projection of X--------------------
    
    
    def transformation( self, X, no_of_components ) :
        
        
        
        e, V = self.eig_vector( X )
        
        p = V[:, : no_of_components ]
        
        
    
        # project the original dataset
    
        mean = np.mean( X, 0 )
        
        X_stand = X - mean
    
        X_transform = np.dot( X_stand, p )
        
        return X_transform

# Selection Methods

## Conditional Number    ---      ( max( lamda ) / 10 ) < lamda

In [18]:
# function return number of components 

def conditional_number( X ) :
    
    pca = PCA()
    
    e, V = pca.eig_vector( X )
    
    e_max = e[0]
    
    condition = e_max / 10
    
    no_of_components = np.argmax( e < condition )
    
    if( no_of_components == 0 ) :
        
        return 1
    
    else :
        
        return no_of_components

## Kaiser rule --- ( lamda > 1 )

In [19]:
# function return number of components

def kaiser_rule( X ) :
    
    pca = PCA()
    
    e, V = pca.eig_vector( X )
    
    no_of_components = np.argmax( e < 1 )
    
    if( no_of_components == 0 ) :
        
        return 1
    
    else :
        
        return no_of_components

## Broken Stick rule

In [20]:
# function return number of components

def broken_stick( X ) :
    
    
    
    pca = PCA()
    
    e, V = pca.eig_vector( X )
    
    
    
    # Calculate the proportional variance
    
    propvar = e / sum( e )
    
    
    
    # calculate the expected length of the k-th longest segment
    
    p = np.size( e )
    
    g = np.zeros( ( p ) )
    
    k = 0
    
    while( k < p ) :
        
        i = k
        
        while( i < p ) :
            
            g[k] = g[k] + ( 1 / ( i + 1 ) )
            
            i = i + 1
            
        k = k + 1

    g = g / p     
    
    
    
    
    # In the Broken-Stick model, the individual percentages of variance of the components are compared with the values expected from the “broken stick” distribution. 
    # The two distributions are compared element-by-element, and first value d + 1 where the expected valueis larger than the observed value determines the dimension.

    no_of_components = np.argmax( propvar < g )
            
    if( no_of_components == 0 ) :
        
        return 1
    
    else :
        
        return no_of_components



# Models

## KNN

In [21]:
# K Nearest Neighbors Classification

class K_Nearest_Neighbors_Classifier() : 
    
    
    def __init__( self, K ) :
        
        self.K = K
        
    
    
    # Function to store training set
        
    def fit( self, X_train, Y_train ) :
        
        self.X_train = X_train
        
        self.Y_train = Y_train
        
        # no_of_training_examples, no_of_features
        
        self.m, self.n = X_train.shape
    
    
    
    # Function for prediction
        
    def predict( self, X_test ) :
        
        self.X_test = X_test
        
        # no_of_test_examples, no_of_features
        
        self.m_test, self.n = X_test.shape
        
        # initialize Y_predict
        
        Y_predict = np.zeros( self.m_test )
        
        for i in range( self.m_test ) :
            
            x = self.X_test[i]
            
            # find the K nearest neighbors from current test example
            
            neighbors = np.zeros( self.K )
            
            neighbors = self.find_neighbors( x )
            
            # most frequent class in K neighbors
            
            Y_predict[i] = mode( neighbors )[0][0]    
            
        return Y_predict
    
    
    
    # Function to find the K nearest neighbors to current test example
          
    def find_neighbors( self, x ) :
        
        # calculate all the euclidean distances between current test example x and training set X_train
        
        euclidean_distances = np.zeros( self.m )
        
        for i in range( self.m ) :
            
            d = self.euclidean( x, self.X_train[i] )
            
            euclidean_distances[i] = d
        
        # sort Y_train according to euclidean_distance_array and store into Y_train_sorted
        
        inds = euclidean_distances.argsort()
        
        Y_train_sorted = self.Y_train[inds]
        
        return Y_train_sorted[:self.K]
    
    
    
    # Function to calculate euclidean distance
            
    def euclidean( self, x, x_train ) :
        
        return np.sqrt( np.sum( np.square( x - x_train ) ) )

## Logistic Regression

In [22]:
# # Logistic Regression

class LogitRegression() :
    
    
    
    def __init__( self, learning_rate, iterations ) :        
        
        self.learning_rate = learning_rate        
        
        self.iterations = iterations
        
        
          
    # Function for model training   
    
    def fit( self, X, Y ) :        
        
        # no_of_training_examples, no_of_features        
        
        self.m, self.n = X.shape        
        
        # weight initialization        
        
        self.W = np.zeros( self.n )        
        
        self.b = 0        
        
        self.X = X        
        
        self.Y = Y
          
        # gradient descent learning
                  
        for i in range( self.iterations ) :            
            
            self.update_weights()            
        
        return self
      
    
    
    # Helper function to update weights in gradient descent
      
    def update_weights( self ) :           
        
        A = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )
          
        # calculate gradients        
        
        tmp = ( A - self.Y.T )        
        
        tmp = np.reshape( tmp, self.m )        
        
        dW = np.dot( self.X.T, tmp ) / self.m         
        
        db = np.sum( tmp ) / self.m 
          
        # update weights    
        
        self.W = self.W - self.learning_rate * dW    
        
        self.b = self.b - self.learning_rate * db
          
        return self
      
    
    
    # Hypothetical function  h( x ) 
      
    def predict( self, X ) :    
        
        Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )        
        
        Y = np.where( Z > 0.5, 1, 0 )        
        
        return Y

# Modelling and Balanced Accuracy calculation 10 times

In [35]:
# return the average of balanced accuracy after running 10 times with 10 fold stratified cross-validation

def fun( X, y, model ) :
    
    
    
    # outer loop to calculate the balanced accuracy 10 times
    
    average_balanced_accuracies = []
    
    iterations = 1
    
    for i in range( 0, 10 ) :
        
        
        # shuffle X, y before Splitting
        
        shuffle( X, y )
        
        
        
        skfold = StratifiedKFold( n_splits = 10, shuffle = True )
    
        fold_no = 1

        balanced_accuracies = []
        
        
        
        # inner loop for 10 fold stratified cross validation

        for train_index, test_index in skfold.split( X, y ) :
            
            X_train, X_test = X[train_index], X[test_index]
    
            y_train, y_test = y[train_index], y[test_index]
         
            model.fit( X_train, y_train )
            
            balanced_accuracy = balanced_accuracy_score( y_test, model.predict( X_test ) )

            print("balanced_accuracy_score on %d fold : % f" % ( fold_no, balanced_accuracy ) )
    
            balanced_accuracies.append( balanced_accuracy )

            fold_no += 1
                   
        print("average balanced accuracy during %d iteration : % f" % ( iterations, np.mean( balanced_accuracies ) ) )
              
        average_balanced_accuracies.append( np.mean( balanced_accuracies ) )
        
        iterations += 1
        
    return np.mean( average_balanced_accuracies )

# Generic Functions for selecting principal components and modelling by all three algorithms

## Modelling with Conditional Number

In [36]:
def conditional_modelling( X, y ) :
    
    
    # Selecting components
    
    no_of_components_conditonal_number = conditional_number( X )

    print( "Conditional number : ", no_of_components_conditonal_number )
    
    pca = PCA()

    X_conditional_number = pca.transformation( X, no_of_components_conditonal_number )
    
    
    
    
    # KNN
    
    knn = K_Nearest_Neighbors_Classifier( K = 5 )

    print( "Average balanced accuracy of KNN after 10 iterations : %f" % ( fun( X_conditional_number, y, knn ) ) )
    
    
    
    # LDA

    lda = LinearDiscriminantAnalysis()

    print( "Average balanced accuracy of LDA after 10 iterations : %f" % ( fun( X_conditional_number, y, lda ) ) )
    
    
    
    # Logistic Regression
    
    logit = LogitRegression( learning_rate = 0.01, iterations = 1000 )

    print( "Average balanced accuracy of Logistic Regression after 10 iterations : %f" % ( fun( X_conditional_number, y, logit ) ) )
    
    
    

## Modelling with Kaiser rule 

In [37]:
def kaiser_modelling( X, y ) :
    
    no_of_components_kaiser_rule = kaiser_rule( X )

    print( "Kaiser rule : ", no_of_components_kaiser_rule )
    
    pca = PCA()

    X_kaiser_rule = pca.transformation( X, no_of_components_kaiser_rule )
    
    
    # KNN
    
    knn = K_Nearest_Neighbors_Classifier( K = 5 )

    print( "Average balanced accuracy of KNN after 10 iterations : %f" % ( fun( X_kaiser_rule, y, knn ) ) )
    
    
    
    # LDA 
    
    lda = LinearDiscriminantAnalysis()

    print( "Average balanced accuracy of LDA after 10 iterations : %f" % ( fun( X_kaiser_rule, y, lda ) ) )
    
   


    # Logistic Regression
    
    logit = LogitRegression( learning_rate = 0.01, iterations = 1000 )

    print( "Average balanced accuracy of Logistic Regression after 10 iterations : %f" % ( fun( X_kaiser_rule, y, logit ) ) )
    
    
    

## Modelling with Broken Stick rule

In [38]:
def broken_modelling( X, y ) :
    
    no_of_components_broken_stick_rule = broken_stick( X )

    print( "Broken Stick Rule : ",  no_of_components_broken_stick_rule ) 
    
    pca = PCA()

    X_broken_stick_rule = pca.transformation( X, no_of_components_broken_stick_rule )
    
    
    # KNN
    
    knn = K_Nearest_Neighbors_Classifier( K = 5 )

    print( "Average balanced accuracy of KNN after 10 iterations : %f" % ( fun( X_broken_stick_rule, y, knn ) ) )
    
    
    
    # LDA
    
    lda = LinearDiscriminantAnalysis()

    print( "Average balanced accuracy of LDA after 10 iterations : %f" % ( fun( X_broken_stick_rule, y, lda ) ) )
    
    
    
    # Logistic Regression
    
    logit = LogitRegression( learning_rate = 0.01, iterations = 1000 )

    print( "Average balanced accuracy of Logistic Regression after 10 iterations : %f" % ( fun( X_broken_stick_rule, y, logit ) ) )  

# Breast Cancer Dataset

In [39]:
data = loadmat( 'Databases/breastCancer.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(569, 30)
(569, 1)


In [41]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.857792
balanced_accuracy_score on 2 fold :  0.909091
balanced_accuracy_score on 3 fold :  0.811508
balanced_accuracy_score on 4 fold :  0.914683
balanced_accuracy_score on 5 fold :  0.958333
balanced_accuracy_score on 6 fold :  0.890873
balanced_accuracy_score on 7 fold :  0.900794
balanced_accuracy_score on 8 fold :  0.924603
balanced_accuracy_score on 9 fold :  0.853175
balanced_accuracy_score on 10 fold :  0.947619
average balanced accuracy during 1 iteration :  0.896847
balanced_accuracy_score on 1 fold :  0.894805
balanced_accuracy_score on 2 fold :  0.851948
balanced_accuracy_score on 3 fold :  0.900794
balanced_accuracy_score on 4 fold :  0.882937
balanced_accuracy_score on 5 fold :  0.914683
balanced_accuracy_score on 6 fold :  0.934524
balanced_accuracy_score on 7 fold :  0.890873
balanced_accuracy_score on 8 fold :  0.857143
balanced_accuracy_score on 9 fold :  0.880952
balanced_accuracy_score on 10 fold :  0.9523

balanced_accuracy_score on 2 fold :  0.909091
balanced_accuracy_score on 3 fold :  0.890873
balanced_accuracy_score on 4 fold :  0.867063
balanced_accuracy_score on 5 fold :  0.867063
balanced_accuracy_score on 6 fold :  0.876984
balanced_accuracy_score on 7 fold :  0.952381
balanced_accuracy_score on 8 fold :  0.890873
balanced_accuracy_score on 9 fold :  0.876984
balanced_accuracy_score on 10 fold :  0.838095
average balanced accuracy during 1 iteration :  0.806032
balanced_accuracy_score on 1 fold :  0.931818
balanced_accuracy_score on 2 fold :  0.849351
balanced_accuracy_score on 3 fold :  0.904762
balanced_accuracy_score on 4 fold :  0.867063
balanced_accuracy_score on 5 fold :  0.863095
balanced_accuracy_score on 6 fold :  0.900794
balanced_accuracy_score on 7 fold :  0.890873
balanced_accuracy_score on 8 fold :  0.815476
balanced_accuracy_score on 9 fold :  0.890873
balanced_accuracy_score on 10 fold :  0.952381
average balanced accuracy during 2 iteration :  0.886649
balanced_a

In [42]:
kaiser_modelling( X, y )

Kaiser rule :  7
balanced_accuracy_score on 1 fold :  0.962987
balanced_accuracy_score on 2 fold :  0.880519
balanced_accuracy_score on 3 fold :  0.914683
balanced_accuracy_score on 4 fold :  0.948413
balanced_accuracy_score on 5 fold :  0.972222
balanced_accuracy_score on 6 fold :  0.914683
balanced_accuracy_score on 7 fold :  0.853175
balanced_accuracy_score on 8 fold :  0.928571
balanced_accuracy_score on 9 fold :  0.948413
balanced_accuracy_score on 10 fold :  0.904762
average balanced accuracy during 1 iteration :  0.922843
balanced_accuracy_score on 1 fold :  0.894805
balanced_accuracy_score on 2 fold :  0.857792
balanced_accuracy_score on 3 fold :  0.928571
balanced_accuracy_score on 4 fold :  0.952381
balanced_accuracy_score on 5 fold :  0.896825
balanced_accuracy_score on 6 fold :  0.900794
balanced_accuracy_score on 7 fold :  0.900794
balanced_accuracy_score on 8 fold :  0.938492
balanced_accuracy_score on 9 fold :  0.962302
balanced_accuracy_score on 10 fold :  0.938095
aver

balanced_accuracy_score on 1 fold :  0.942857
balanced_accuracy_score on 2 fold :  0.931818
balanced_accuracy_score on 3 fold :  0.900794
balanced_accuracy_score on 4 fold :  0.948413
balanced_accuracy_score on 5 fold :  0.880952
balanced_accuracy_score on 6 fold :  0.972222
balanced_accuracy_score on 7 fold :  0.928571
balanced_accuracy_score on 8 fold :  0.976190
balanced_accuracy_score on 9 fold :  0.952381
balanced_accuracy_score on 10 fold :  0.828571
average balanced accuracy during 1 iteration :  0.926277
balanced_accuracy_score on 1 fold :  0.954545
balanced_accuracy_score on 2 fold :  0.917532
balanced_accuracy_score on 3 fold :  0.958333
balanced_accuracy_score on 4 fold :  0.914683
balanced_accuracy_score on 5 fold :  0.867063
balanced_accuracy_score on 6 fold :  0.890873
balanced_accuracy_score on 7 fold :  0.914683
balanced_accuracy_score on 8 fold :  0.900794
balanced_accuracy_score on 9 fold :  0.938492
balanced_accuracy_score on 10 fold :  0.961905
average balanced accu

In [43]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.874675
balanced_accuracy_score on 2 fold :  0.812338
balanced_accuracy_score on 3 fold :  0.829365
balanced_accuracy_score on 4 fold :  0.825397
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  0.914683
balanced_accuracy_score on 7 fold :  0.904762
balanced_accuracy_score on 8 fold :  0.920635
balanced_accuracy_score on 9 fold :  0.886905
balanced_accuracy_score on 10 fold :  0.976190
average balanced accuracy during 1 iteration :  0.884971
balanced_accuracy_score on 1 fold :  0.917532
balanced_accuracy_score on 2 fold :  0.909091
balanced_accuracy_score on 3 fold :  0.863095
balanced_accuracy_score on 4 fold :  0.910714
balanced_accuracy_score on 5 fold :  0.873016
balanced_accuracy_score on 6 fold :  0.924603
balanced_accuracy_score on 7 fold :  0.880952
balanced_accuracy_score on 8 fold :  0.890873
balanced_accuracy_score on 9 fold :  0.863095
balanced_accuracy_score on 10 fold :  0.84285

balanced_accuracy_score on 2 fold :  0.931818
balanced_accuracy_score on 3 fold :  0.924603
balanced_accuracy_score on 4 fold :  0.853175
balanced_accuracy_score on 5 fold :  0.890873
balanced_accuracy_score on 6 fold :  0.867063
balanced_accuracy_score on 7 fold :  0.924603
balanced_accuracy_score on 8 fold :  0.880952
balanced_accuracy_score on 9 fold :  0.795635
balanced_accuracy_score on 10 fold :  0.928571
average balanced accuracy during 1 iteration :  0.886353
balanced_accuracy_score on 1 fold :  0.954545
balanced_accuracy_score on 2 fold :  0.857792
balanced_accuracy_score on 3 fold :  0.805556
balanced_accuracy_score on 4 fold :  0.934524
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  0.876984
balanced_accuracy_score on 7 fold :  0.853175
balanced_accuracy_score on 8 fold :  0.890873
balanced_accuracy_score on 9 fold :  0.928571
balanced_accuracy_score on 10 fold :  0.857143
average balanced accuracy during 2 iteration :  0.886392
balanced_a

# Banknote Dataset

In [44]:
data = loadmat( 'Databases/Banknote.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(1372, 4)
(1372, 1)


In [45]:
conditional_modelling( X, y )

Conditional number :  2
balanced_accuracy_score on 1 fold :  0.848307
balanced_accuracy_score on 2 fold :  0.869491
balanced_accuracy_score on 3 fold :  0.853861
balanced_accuracy_score on 4 fold :  0.865293
balanced_accuracy_score on 5 fold :  0.894953
balanced_accuracy_score on 6 fold :  0.880177
balanced_accuracy_score on 7 fold :  0.827653
balanced_accuracy_score on 8 fold :  0.867019
balanced_accuracy_score on 9 fold :  0.827653
balanced_accuracy_score on 10 fold :  0.860440
average balanced accuracy during 1 iteration :  0.859485
balanced_accuracy_score on 1 fold :  0.856504
balanced_accuracy_score on 2 fold :  0.830211
balanced_accuracy_score on 3 fold :  0.868637
balanced_accuracy_score on 4 fold :  0.817731
balanced_accuracy_score on 5 fold :  0.863783
balanced_accuracy_score on 6 fold :  0.839193
balanced_accuracy_score on 7 fold :  0.894845
balanced_accuracy_score on 8 fold :  0.870362
balanced_accuracy_score on 9 fold :  0.881795
balanced_accuracy_score on 10 fold :  0.8472

balanced_accuracy_score on 1 fold :  0.717479
balanced_accuracy_score on 2 fold :  0.795721
balanced_accuracy_score on 3 fold :  0.647110
balanced_accuracy_score on 4 fold :  0.725841
balanced_accuracy_score on 5 fold :  0.706104
balanced_accuracy_score on 6 fold :  0.704594
balanced_accuracy_score on 7 fold :  0.714409
balanced_accuracy_score on 8 fold :  0.758736
balanced_accuracy_score on 9 fold :  0.757118
balanced_accuracy_score on 10 fold :  0.734146
average balanced accuracy during 1 iteration :  0.726126
balanced_accuracy_score on 1 fold :  0.655312
balanced_accuracy_score on 2 fold :  0.774537
balanced_accuracy_score on 3 fold :  0.757118
balanced_accuracy_score on 4 fold :  0.727459
balanced_accuracy_score on 5 fold :  0.729185
balanced_accuracy_score on 6 fold :  0.753775
balanced_accuracy_score on 7 fold :  0.722606
balanced_accuracy_score on 8 fold :  0.665121
balanced_accuracy_score on 9 fold :  0.747304
balanced_accuracy_score on 10 fold :  0.730802
average balanced accu

In [46]:
kaiser_modelling( X, y )

Kaiser rule :  1
balanced_accuracy_score on 1 fold :  0.668618
balanced_accuracy_score on 2 fold :  0.693208
balanced_accuracy_score on 3 fold :  0.734038
balanced_accuracy_score on 4 fold :  0.738999
balanced_accuracy_score on 5 fold :  0.653473
balanced_accuracy_score on 6 fold :  0.712683
balanced_accuracy_score on 7 fold :  0.801445
balanced_accuracy_score on 8 fold :  0.689603
balanced_accuracy_score on 9 fold :  0.707830
balanced_accuracy_score on 10 fold :  0.665121
average balanced accuracy during 1 iteration :  0.706502
balanced_accuracy_score on 1 fold :  0.740366
balanced_accuracy_score on 2 fold :  0.706195
balanced_accuracy_score on 3 fold :  0.678279
balanced_accuracy_score on 4 fold :  0.766933
balanced_accuracy_score on 5 fold :  0.709340
balanced_accuracy_score on 6 fold :  0.696182
balanced_accuracy_score on 7 fold :  0.643766
balanced_accuracy_score on 8 fold :  0.707614
balanced_accuracy_score on 9 fold :  0.628991
balanced_accuracy_score on 10 fold :  0.788287
aver

balanced_accuracy_score on 1 fold :  0.616032
balanced_accuracy_score on 2 fold :  0.596551
balanced_accuracy_score on 3 fold :  0.599547
balanced_accuracy_score on 4 fold :  0.607852
balanced_accuracy_score on 5 fold :  0.637403
balanced_accuracy_score on 6 fold :  0.558563
balanced_accuracy_score on 7 fold :  0.578300
balanced_accuracy_score on 8 fold :  0.594694
balanced_accuracy_score on 9 fold :  0.612597
balanced_accuracy_score on 10 fold :  0.591242
average balanced accuracy during 1 iteration :  0.599278
balanced_accuracy_score on 1 fold :  0.571961
balanced_accuracy_score on 2 fold :  0.581541
balanced_accuracy_score on 3 fold :  0.625755
balanced_accuracy_score on 4 fold :  0.624245
balanced_accuracy_score on 5 fold :  0.506040
balanced_accuracy_score on 6 fold :  0.602783
balanced_accuracy_score on 7 fold :  0.612705
balanced_accuracy_score on 8 fold :  0.689819
balanced_accuracy_score on 9 fold :  0.581536
balanced_accuracy_score on 10 fold :  0.617666
average balanced accu

In [47]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.697999
balanced_accuracy_score on 2 fold :  0.742069
balanced_accuracy_score on 3 fold :  0.725841
balanced_accuracy_score on 4 fold :  0.720880
balanced_accuracy_score on 5 fold :  0.711066
balanced_accuracy_score on 6 fold :  0.766933
balanced_accuracy_score on 7 fold :  0.737274
balanced_accuracy_score on 8 fold :  0.645276
balanced_accuracy_score on 9 fold :  0.732420
balanced_accuracy_score on 10 fold :  0.719154
average balanced accuracy during 1 iteration :  0.719891
balanced_accuracy_score on 1 fold :  0.738982
balanced_accuracy_score on 2 fold :  0.729082
balanced_accuracy_score on 3 fold :  0.753667
balanced_accuracy_score on 4 fold :  0.802955
balanced_accuracy_score on 5 fold :  0.689711
balanced_accuracy_score on 6 fold :  0.727459
balanced_accuracy_score on 7 fold :  0.732312
balanced_accuracy_score on 8 fold :  0.752157
balanced_accuracy_score on 9 fold :  0.712575
balanced_accuracy_score on 10 fold :  0.64711

balanced_accuracy_score on 1 fold :  0.642006
balanced_accuracy_score on 2 fold :  0.571961
balanced_accuracy_score on 3 fold :  0.614215
balanced_accuracy_score on 4 fold :  0.599655
balanced_accuracy_score on 5 fold :  0.716135
balanced_accuracy_score on 6 fold :  0.574957
balanced_accuracy_score on 7 fold :  0.591458
balanced_accuracy_score on 8 fold :  0.548641
balanced_accuracy_score on 9 fold :  0.601165
balanced_accuracy_score on 10 fold :  0.561799
average balanced accuracy during 1 iteration :  0.602199
balanced_accuracy_score on 1 fold :  0.576751
balanced_accuracy_score on 2 fold :  0.549393
balanced_accuracy_score on 3 fold :  0.599547
balanced_accuracy_score on 4 fold :  0.593076
balanced_accuracy_score on 5 fold :  0.625863
balanced_accuracy_score on 6 fold :  0.624245
balanced_accuracy_score on 7 fold :  0.612813
balanced_accuracy_score on 8 fold :  0.601165
balanced_accuracy_score on 9 fold :  0.568378
balanced_accuracy_score on 10 fold :  0.665121
average balanced accu

# Blood Dataset

In [48]:
data = loadmat( 'Databases/blood.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(748, 4)
(748, 1)


In [49]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.577485
balanced_accuracy_score on 2 fold :  0.593567
balanced_accuracy_score on 3 fold :  0.549708
balanced_accuracy_score on 4 fold :  0.685673
balanced_accuracy_score on 5 fold :  0.622807
balanced_accuracy_score on 6 fold :  0.540936
balanced_accuracy_score on 7 fold :  0.540936
balanced_accuracy_score on 8 fold :  0.595029
balanced_accuracy_score on 9 fold :  0.582559
balanced_accuracy_score on 10 fold :  0.553148
average balanced accuracy during 1 iteration :  0.584185
balanced_accuracy_score on 1 fold :  0.558480
balanced_accuracy_score on 2 fold :  0.641813
balanced_accuracy_score on 3 fold :  0.622807
balanced_accuracy_score on 4 fold :  0.631579
balanced_accuracy_score on 5 fold :  0.548246
balanced_accuracy_score on 6 fold :  0.595029
balanced_accuracy_score on 7 fold :  0.631579
balanced_accuracy_score on 8 fold :  0.605263
balanced_accuracy_score on 9 fold :  0.591331
balanced_accuracy_score on 10 fold :  0.5856

balanced_accuracy_score on 2 fold :  0.324561
balanced_accuracy_score on 3 fold :  0.602339
balanced_accuracy_score on 4 fold :  0.584795
balanced_accuracy_score on 5 fold :  0.755848
balanced_accuracy_score on 6 fold :  0.390351
balanced_accuracy_score on 7 fold :  0.543860
balanced_accuracy_score on 8 fold :  0.501462
balanced_accuracy_score on 9 fold :  0.408153
balanced_accuracy_score on 10 fold :  0.440144
average balanced accuracy during 1 iteration :  0.517140
balanced_accuracy_score on 1 fold :  0.646199
balanced_accuracy_score on 2 fold :  0.343567
balanced_accuracy_score on 3 fold :  0.711988
balanced_accuracy_score on 4 fold :  0.437135
balanced_accuracy_score on 5 fold :  0.647661
balanced_accuracy_score on 6 fold :  0.498538
balanced_accuracy_score on 7 fold :  0.480994
balanced_accuracy_score on 8 fold :  0.519006
balanced_accuracy_score on 9 fold :  0.665635
balanced_accuracy_score on 10 fold :  0.337461
average balanced accuracy during 2 iteration :  0.528818
balanced_a

In [50]:
kaiser_modelling( X, y )

Kaiser rule :  3
balanced_accuracy_score on 1 fold :  0.568713
balanced_accuracy_score on 2 fold :  0.595029
balanced_accuracy_score on 3 fold :  0.549708
balanced_accuracy_score on 4 fold :  0.552632
balanced_accuracy_score on 5 fold :  0.622807
balanced_accuracy_score on 6 fold :  0.631579
balanced_accuracy_score on 7 fold :  0.621345
balanced_accuracy_score on 8 fold :  0.649123
balanced_accuracy_score on 9 fold :  0.600103
balanced_accuracy_score on 10 fold :  0.615067
average balanced accuracy during 1 iteration :  0.600611
balanced_accuracy_score on 1 fold :  0.659357
balanced_accuracy_score on 2 fold :  0.558480
balanced_accuracy_score on 3 fold :  0.577485
balanced_accuracy_score on 4 fold :  0.548246
balanced_accuracy_score on 5 fold :  0.614035
balanced_accuracy_score on 6 fold :  0.668129
balanced_accuracy_score on 7 fold :  0.565789
balanced_accuracy_score on 8 fold :  0.605263
balanced_accuracy_score on 9 fold :  0.497420
balanced_accuracy_score on 10 fold :  0.582559
aver

balanced_accuracy_score on 1 fold :  0.380117
balanced_accuracy_score on 2 fold :  0.396199
balanced_accuracy_score on 3 fold :  0.336257
balanced_accuracy_score on 4 fold :  0.507310
balanced_accuracy_score on 5 fold :  0.720760
balanced_accuracy_score on 6 fold :  0.628655
balanced_accuracy_score on 7 fold :  0.342105
balanced_accuracy_score on 8 fold :  0.635965
balanced_accuracy_score on 9 fold :  0.495872
balanced_accuracy_score on 10 fold :  0.639319
average balanced accuracy during 1 iteration :  0.508256
balanced_accuracy_score on 1 fold :  0.412281
balanced_accuracy_score on 2 fold :  0.472222
balanced_accuracy_score on 3 fold :  0.536550
balanced_accuracy_score on 4 fold :  0.589181
balanced_accuracy_score on 5 fold :  0.802632
balanced_accuracy_score on 6 fold :  0.644737
balanced_accuracy_score on 7 fold :  0.380117
balanced_accuracy_score on 8 fold :  0.479532
balanced_accuracy_score on 9 fold :  0.706914
balanced_accuracy_score on 10 fold :  0.527348
average balanced accu

In [51]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.494152
balanced_accuracy_score on 2 fold :  0.695906
balanced_accuracy_score on 3 fold :  0.678363
balanced_accuracy_score on 4 fold :  0.586257
balanced_accuracy_score on 5 fold :  0.567251
balanced_accuracy_score on 6 fold :  0.520468
balanced_accuracy_score on 7 fold :  0.669591
balanced_accuracy_score on 8 fold :  0.595029
balanced_accuracy_score on 9 fold :  0.541280
balanced_accuracy_score on 10 fold :  0.526832
average balanced accuracy during 1 iteration :  0.587513
balanced_accuracy_score on 1 fold :  0.549708
balanced_accuracy_score on 2 fold :  0.595029
balanced_accuracy_score on 3 fold :  0.631579
balanced_accuracy_score on 4 fold :  0.504386
balanced_accuracy_score on 5 fold :  0.577485
balanced_accuracy_score on 6 fold :  0.549708
balanced_accuracy_score on 7 fold :  0.678363
balanced_accuracy_score on 8 fold :  0.538012
balanced_accuracy_score on 9 fold :  0.591331
balanced_accuracy_score on 10 fold :  0.64447

balanced_accuracy_score on 2 fold :  0.755848
balanced_accuracy_score on 3 fold :  0.635965
balanced_accuracy_score on 4 fold :  0.568713
balanced_accuracy_score on 5 fold :  0.482456
balanced_accuracy_score on 6 fold :  0.666667
balanced_accuracy_score on 7 fold :  0.456140
balanced_accuracy_score on 8 fold :  0.435673
balanced_accuracy_score on 9 fold :  0.512900
balanced_accuracy_score on 10 fold :  0.381321
average balanced accuracy during 1 iteration :  0.523194
balanced_accuracy_score on 1 fold :  0.555556
balanced_accuracy_score on 2 fold :  0.336257
balanced_accuracy_score on 3 fold :  0.555556
balanced_accuracy_score on 4 fold :  0.616959
balanced_accuracy_score on 5 fold :  0.618421
balanced_accuracy_score on 6 fold :  0.675439
balanced_accuracy_score on 7 fold :  0.583333
balanced_accuracy_score on 8 fold :  0.602339
balanced_accuracy_score on 9 fold :  0.566047
balanced_accuracy_score on 10 fold :  0.387513
average balanced accuracy during 2 iteration :  0.549742
balanced_a

# Climate Dataset

In [52]:
data = loadmat( 'Databases/climate.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(540, 18)
(540, 1)


In [53]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.489796
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.498980
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.490000
balanced_accuracy_score on 9 fold :  0.490000
balanced_accuracy_score on 10 fold :  0.5000

balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.500000
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 2 iteration :  0.500000
balanced_a

In [54]:
kaiser_modelling( X, y )

Kaiser rule :  1
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.489796
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.498980
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.490000
aver

balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.500000
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 2 iteration :  0.500000
balanced_a

In [55]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.489796
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.489796
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.489796
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.496939
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.489796
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.50000

balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.500000
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 2 iteration :  0.500000
balanced_a

# Cryotherapy Dataset

In [56]:
data = loadmat( 'Databases/Cryotherapy.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(90, 6)
(90, 1)


In [57]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.575000
balanced_accuracy_score on 2 fold :  0.350000
balanced_accuracy_score on 3 fold :  0.550000
balanced_accuracy_score on 4 fold :  1.000000
balanced_accuracy_score on 5 fold :  0.300000
balanced_accuracy_score on 6 fold :  0.575000
balanced_accuracy_score on 7 fold :  0.525000
balanced_accuracy_score on 8 fold :  0.525000
balanced_accuracy_score on 9 fold :  0.775000
balanced_accuracy_score on 10 fold :  0.550000
average balanced accuracy during 1 iteration :  0.572500
balanced_accuracy_score on 1 fold :  0.375000
balanced_accuracy_score on 2 fold :  0.325000
balanced_accuracy_score on 3 fold :  0.400000
balanced_accuracy_score on 4 fold :  0.650000
balanced_accuracy_score on 5 fold :  0.550000
balanced_accuracy_score on 6 fold :  0.750000
balanced_accuracy_score on 7 fold :  0.675000
balanced_accuracy_score on 8 fold :  0.675000
balanced_accuracy_score on 9 fold :  0.650000
balanced_accuracy_score on 10 fold :  0.5750

balanced_accuracy_score on 2 fold :  0.675000
balanced_accuracy_score on 3 fold :  0.350000
balanced_accuracy_score on 4 fold :  0.525000
balanced_accuracy_score on 5 fold :  0.875000
balanced_accuracy_score on 6 fold :  0.775000
balanced_accuracy_score on 7 fold :  0.625000
balanced_accuracy_score on 8 fold :  0.425000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.525000
average balanced accuracy during 1 iteration :  0.580000
balanced_accuracy_score on 1 fold :  0.575000
balanced_accuracy_score on 2 fold :  0.675000
balanced_accuracy_score on 3 fold :  0.525000
balanced_accuracy_score on 4 fold :  0.450000
balanced_accuracy_score on 5 fold :  0.650000
balanced_accuracy_score on 6 fold :  0.775000
balanced_accuracy_score on 7 fold :  0.525000
balanced_accuracy_score on 8 fold :  0.475000
balanced_accuracy_score on 9 fold :  0.525000
balanced_accuracy_score on 10 fold :  0.575000
average balanced accuracy during 2 iteration :  0.575000
balanced_a

In [58]:
kaiser_modelling( X, y )

Kaiser rule :  4
balanced_accuracy_score on 1 fold :  0.700000
balanced_accuracy_score on 2 fold :  0.450000
balanced_accuracy_score on 3 fold :  0.750000
balanced_accuracy_score on 4 fold :  0.900000
balanced_accuracy_score on 5 fold :  0.675000
balanced_accuracy_score on 6 fold :  0.750000
balanced_accuracy_score on 7 fold :  0.675000
balanced_accuracy_score on 8 fold :  0.800000
balanced_accuracy_score on 9 fold :  0.525000
balanced_accuracy_score on 10 fold :  1.000000
average balanced accuracy during 1 iteration :  0.722500
balanced_accuracy_score on 1 fold :  0.900000
balanced_accuracy_score on 2 fold :  0.800000
balanced_accuracy_score on 3 fold :  0.675000
balanced_accuracy_score on 4 fold :  0.675000
balanced_accuracy_score on 5 fold :  0.775000
balanced_accuracy_score on 6 fold :  0.675000
balanced_accuracy_score on 7 fold :  0.775000
balanced_accuracy_score on 8 fold :  0.875000
balanced_accuracy_score on 9 fold :  0.425000
balanced_accuracy_score on 10 fold :  0.575000
aver

balanced_accuracy_score on 6 fold :  0.900000
balanced_accuracy_score on 7 fold :  0.900000
balanced_accuracy_score on 8 fold :  0.900000
balanced_accuracy_score on 9 fold :  0.900000
balanced_accuracy_score on 10 fold :  0.900000
average balanced accuracy during 10 iteration :  0.890000
Average balanced accuracy of LDA after 10 iterations : 0.887000
balanced_accuracy_score on 1 fold :  0.875000
balanced_accuracy_score on 2 fold :  0.875000
balanced_accuracy_score on 3 fold :  0.875000
balanced_accuracy_score on 4 fold :  0.775000
balanced_accuracy_score on 5 fold :  0.900000
balanced_accuracy_score on 6 fold :  0.900000
balanced_accuracy_score on 7 fold :  0.800000
balanced_accuracy_score on 8 fold :  1.000000
balanced_accuracy_score on 9 fold :  0.775000
balanced_accuracy_score on 10 fold :  0.800000
average balanced accuracy during 1 iteration :  0.857500
balanced_accuracy_score on 1 fold :  0.875000
balanced_accuracy_score on 2 fold :  0.875000
balanced_accuracy_score on 3 fold :  

In [59]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.575000
balanced_accuracy_score on 2 fold :  0.475000
balanced_accuracy_score on 3 fold :  0.675000
balanced_accuracy_score on 4 fold :  0.550000
balanced_accuracy_score on 5 fold :  0.650000
balanced_accuracy_score on 6 fold :  0.625000
balanced_accuracy_score on 7 fold :  0.775000
balanced_accuracy_score on 8 fold :  0.575000
balanced_accuracy_score on 9 fold :  0.675000
balanced_accuracy_score on 10 fold :  0.425000
average balanced accuracy during 1 iteration :  0.600000
balanced_accuracy_score on 1 fold :  0.575000
balanced_accuracy_score on 2 fold :  0.475000
balanced_accuracy_score on 3 fold :  0.650000
balanced_accuracy_score on 4 fold :  0.650000
balanced_accuracy_score on 5 fold :  0.650000
balanced_accuracy_score on 6 fold :  0.450000
balanced_accuracy_score on 7 fold :  0.875000
balanced_accuracy_score on 8 fold :  0.425000
balanced_accuracy_score on 9 fold :  0.650000
balanced_accuracy_score on 10 fold :  0.77500

balanced_accuracy_score on 3 fold :  0.625000
balanced_accuracy_score on 4 fold :  0.625000
balanced_accuracy_score on 5 fold :  0.325000
balanced_accuracy_score on 6 fold :  0.875000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.550000
balanced_accuracy_score on 9 fold :  0.450000
balanced_accuracy_score on 10 fold :  0.650000
average balanced accuracy during 1 iteration :  0.572500
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.450000
balanced_accuracy_score on 3 fold :  0.525000
balanced_accuracy_score on 4 fold :  0.675000
balanced_accuracy_score on 5 fold :  0.550000
balanced_accuracy_score on 6 fold :  0.650000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  1.000000
balanced_accuracy_score on 9 fold :  0.325000
balanced_accuracy_score on 10 fold :  0.550000
average balanced accuracy during 2 iteration :  0.572500
balanced_accuracy_score on 1 fold :  0.550000
balanced_a