In [74]:
import numpy as np

from scipy.io import loadmat

import pandas as pd

from numpy.linalg import eig

from scipy.stats import mode

from sklearn.metrics import balanced_accuracy_score

from sklearn.model_selection import StratifiedKFold

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import warnings

warnings.filterwarnings("ignore")

# PCA

In [75]:
class PCA :
    
    
    
    
    # ---------------function to calculate eigen values and eigen vector for any matrix
    
    
    def eig_vector( self, X ) :
        
        
        
        # centralize
    
        mean = np.mean( X, 0 )
        
        X_stand = X - mean
        
        
    
        # calculate co-variance matrix
    
        X_cov = np.cov( np.transpose( X_stand ) )
        
        
    
        # find the eigenvalues and eigenvectors
    
        e, V = eig( X_cov )
        
        
    
        # sort eigen vector according to eigen values 
        
        idx = np.argsort( -e )

        e = e[idx]

        V = V[:,idx]
        
        m, n = V.shape
        
        return e, V 



    # ----------------projection of X--------------------
    
    
    def transformation( self, X, no_of_components ) :
        
        
        
        e, V = self.eig_vector( X )
        
        p = V[:, : no_of_components ]
        
        
    
        # project the original dataset
    
        mean = np.mean( X, 0 )
        
        X_stand = X - mean
    
        X_transform = np.dot( X_stand, p )
        
        return X_transform

# Selection Methods

## Conditional Number    ---      ( max( lamda ) / 10 ) < lamda

In [101]:
# function return number of components 

def conditional_number( X ) :
    
    pca = PCA()
    
    e, V = pca.eig_vector( X )
    
    e_max = e[0]
    
    condition = e_max / 10
    
    no_of_components = np.argmax( e < condition )
    
    if( no_of_components == 0 ) :
        
        return 1
    
    else :
        
        return no_of_components

## Kaiser rule --- ( lamda > 1 )

In [102]:
# function return number of components

def kaiser_rule( X ) :
    
    pca = PCA()
    
    e, V = pca.eig_vector( X )
    
    no_of_components = np.argmax( e < 1 )
    
    if( no_of_components == 0 ) :
        
        return 1
    
    else :
        
        return no_of_components

## Broken Stick rule

In [103]:
# function return number of components

def broken_stick( X ) :
    
    
    
    pca = PCA()
    
    e, V = pca.eig_vector( X )
    
    
    
    # Calculate the proportional variance
    
    propvar = e / sum( e )
    
    
    
    # calculate the expected length of the k-th longest segment
    
    p = np.size( e )
    
    g = np.zeros( ( p ) )
    
    k = 0
    
    while( k < p ) :
        
        i = k
        
        while( i < p ) :
            
            g[k] = g[k] + ( 1 / ( i + 1 ) )
            
            i = i + 1
            
        k = k + 1

    g = g / p     
    
    
    
    
    # In the Broken-Stick model, the individual percentages of variance of the components are compared with the values expected from the “broken stick” distribution. 
    # The two distributions are compared element-by-element, and first value d + 1 where the expected valueis larger than the observed value determines the dimension.

    no_of_components = np.argmax( propvar < g )
            
    if( no_of_components == 0 ) :
        
        return 1
    
    else :
        
        return no_of_components

# Models

## KNN

In [104]:
# K Nearest Neighbors Classification

class K_Nearest_Neighbors_Classifier() : 
    
    
    def __init__( self, K ) :
        
        self.K = K
        
    
    
    # Function to store training set
        
    def fit( self, X_train, Y_train ) :
        
        self.X_train = X_train
        
        self.Y_train = Y_train
        
        # no_of_training_examples, no_of_features
        
        self.m, self.n = X_train.shape
    
    
    
    # Function for prediction
        
    def predict( self, X_test ) :
        
        self.X_test = X_test
        
        # no_of_test_examples, no_of_features
        
        self.m_test, self.n = X_test.shape
        
        # initialize Y_predict
        
        Y_predict = np.zeros( self.m_test )
        
        for i in range( self.m_test ) :
            
            x = self.X_test[i]
            
            # find the K nearest neighbors from current test example
            
            neighbors = np.zeros( self.K )
            
            neighbors = self.find_neighbors( x )
            
            # most frequent class in K neighbors
            
            Y_predict[i] = mode( neighbors )[0][0]    
            
        return Y_predict
    
    
    
    # Function to find the K nearest neighbors to current test example
          
    def find_neighbors( self, x ) :
        
        # calculate all the euclidean distances between current test example x and training set X_train
        
        euclidean_distances = np.zeros( self.m )
        
        for i in range( self.m ) :
            
            d = self.euclidean( x, self.X_train[i] )
            
            euclidean_distances[i] = d
        
        # sort Y_train according to euclidean_distance_array and store into Y_train_sorted
        
        inds = euclidean_distances.argsort()
        
        Y_train_sorted = self.Y_train[inds]
        
        return Y_train_sorted[:self.K]
    
    
    
    # Function to calculate euclidean distance
            
    def euclidean( self, x, x_train ) :
        
        return np.sqrt( np.sum( np.square( x - x_train ) ) )

## Logistic Regression

In [105]:
# # Logistic Regression

class LogitRegression() :
    
    
    
    def __init__( self, learning_rate, iterations ) :        
        
        self.learning_rate = learning_rate        
        
        self.iterations = iterations
        
        
          
    # Function for model training   
    
    def fit( self, X, Y ) :        
        
        # no_of_training_examples, no_of_features        
        
        self.m, self.n = X.shape        
        
        # weight initialization        
        
        self.W = np.zeros( self.n )        
        
        self.b = 0        
        
        self.X = X        
        
        self.Y = Y
          
        # gradient descent learning
                  
        for i in range( self.iterations ) :            
            
            self.update_weights()            
        
        return self
      
    
    
    # Helper function to update weights in gradient descent
      
    def update_weights( self ) :           
        
        A = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )
          
        # calculate gradients        
        
        tmp = ( A - self.Y.T )        
        
        tmp = np.reshape( tmp, self.m )        
        
        dW = np.dot( self.X.T, tmp ) / self.m         
        
        db = np.sum( tmp ) / self.m 
          
        # update weights    
        
        self.W = self.W - self.learning_rate * dW    
        
        self.b = self.b - self.learning_rate * db
          
        return self
      
    
    
    # Hypothetical function  h( x ) 
      
    def predict( self, X ) :    
        
        Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )        
        
        Y = np.where( Z > 0.5, 1, 0 )        
        
        return Y

# Modelling and Balanced Accuracy calculation 10 times

In [106]:
# return the average of balanced accuracy after running 10 times with 10 fold stratified cross-validation

def fun( X, y, model ) :
    
    
    
    # outer loop to calculate the balanced accuracy 10 times
    
    average_balanced_accuracies = []
    
    iterations = 1
    
    for i in range( 0, 10 ) :
        
        
        skfold = StratifiedKFold( n_splits = 10, shuffle = False )
    
        fold_no = 1

        balanced_accuracies = []
        
        
        
        # inner loop for 10 fold stratified cross validation

        for train_index, test_index in skfold.split( X, y ) :
            
            X_train, X_test = X[train_index], X[test_index]
    
            y_train, y_test = y[train_index], y[test_index]
         
            model.fit( X_train, y_train )
            
            balanced_accuracy = balanced_accuracy_score( y_test, model.predict( X_test ) )

            print("balanced_accuracy_score on %d fold : % f" % ( fold_no, balanced_accuracy ) )
    
            balanced_accuracies.append( balanced_accuracy )

            fold_no += 1
                   
        print("average balanced accuracy during %d iteration : % f" % ( iterations, np.mean( balanced_accuracies ) ) )
              
        average_balanced_accuracies.append( np.mean( balanced_accuracies ) )
        
        iterations += 1
        
    return np.mean( average_balanced_accuracies )

# Generic Functions for selecting principal components and modelling by all three algorithms

## Modelling with Conditional Number

In [107]:
def conditional_modelling( X, y ) :
    
    
    # Selecting components
    
    no_of_components_conditonal_number = conditional_number( X )

    print( "Conditional number : ", no_of_components_conditonal_number )
    
    pca = PCA()

    X_conditional_number = pca.transformation( X, no_of_components_conditonal_number )
    
    
    
    
    # KNN
    
    knn = K_Nearest_Neighbors_Classifier( K = 5 )

    print( "Average balanced accuracy of KNN after 10 iterations : %f" % ( fun( X_conditional_number, y, knn ) ) )
    
    
    
    # LDA

    lda = LinearDiscriminantAnalysis()

    print( "Average balanced accuracy of LDA after 10 iterations : %f" % ( fun( X_conditional_number, y, lda ) ) )
    
    
    
    # Logistic Regression
    
    logit = LogitRegression( learning_rate = 0.01, iterations = 1000 )

    print( "Average balanced accuracy of Logistic Regression after 10 iterations : %f" % ( fun( X_conditional_number, y, logit ) ) )

## Modelling with Kaiser rule 

In [108]:
def kaiser_modelling( X, y ) :
    
    no_of_components_kaiser_rule = kaiser_rule( X )

    print( "Kaiser rule : ", no_of_components_kaiser_rule )
    
    pca = PCA()

    X_kaiser_rule = pca.transformation( X, no_of_components_kaiser_rule )
    
    
    # KNN
    
    knn = K_Nearest_Neighbors_Classifier( K = 5 )

    print( "Average balanced accuracy of KNN after 10 iterations : %f" % ( fun( X_kaiser_rule, y, knn ) ) )
    
    
    
    # LDA 
    
    lda = LinearDiscriminantAnalysis()

    print( "Average balanced accuracy of LDA after 10 iterations : %f" % ( fun( X_kaiser_rule, y, lda ) ) )
    
   


    # Logistic Regression
    
    logit = LogitRegression( learning_rate = 0.01, iterations = 1000 )

    print( "Average balanced accuracy of Logistic Regression after 10 iterations : %f" % ( fun( X_kaiser_rule, y, logit ) ) )
    
    
    

## Modelling with Broken Stick rule

In [109]:
def broken_modelling( X, y ) :
    
    no_of_components_broken_stick_rule = broken_stick( X )

    print( "Broken Stick Rule : ",  no_of_components_broken_stick_rule ) 
    
    pca = PCA()

    X_broken_stick_rule = pca.transformation( X, no_of_components_broken_stick_rule )
    
    
    # KNN
    
    knn = K_Nearest_Neighbors_Classifier( K = 5 )

    print( "Average balanced accuracy of KNN after 10 iterations : %f" % ( fun( X_broken_stick_rule, y, knn ) ) )
    
    
    
    # LDA
    
    lda = LinearDiscriminantAnalysis()

    print( "Average balanced accuracy of LDA after 10 iterations : %f" % ( fun( X_broken_stick_rule, y, lda ) ) )
    
    
    
    # Logistic Regression
    
    logit = LogitRegression( learning_rate = 0.01, iterations = 1000 )

    print( "Average balanced accuracy of Logistic Regression after 10 iterations : %f" % ( fun( X_broken_stick_rule, y, logit ) ) )  

# Breast Cancer Dataset

In [110]:
data = loadmat( 'Databases/breastCancer.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(569, 30)
(569, 1)


In [111]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.849351
balanced_accuracy_score on 2 fold :  0.789610
balanced_accuracy_score on 3 fold :  0.876984
balanced_accuracy_score on 4 fold :  0.904762
balanced_accuracy_score on 5 fold :  0.914683
balanced_accuracy_score on 6 fold :  0.896825
balanced_accuracy_score on 7 fold :  0.962302
balanced_accuracy_score on 8 fold :  0.934524
balanced_accuracy_score on 9 fold :  0.896825
balanced_accuracy_score on 10 fold :  0.938095
average balanced accuracy during 1 iteration :  0.896396
balanced_accuracy_score on 1 fold :  0.849351
balanced_accuracy_score on 2 fold :  0.789610
balanced_accuracy_score on 3 fold :  0.876984
balanced_accuracy_score on 4 fold :  0.904762
balanced_accuracy_score on 5 fold :  0.914683
balanced_accuracy_score on 6 fold :  0.896825
balanced_accuracy_score on 7 fold :  0.962302
balanced_accuracy_score on 8 fold :  0.934524
balanced_accuracy_score on 9 fold :  0.896825
balanced_accuracy_score on 10 fold :  0.9380

balanced_accuracy_score on 2 fold :  0.781169
balanced_accuracy_score on 3 fold :  0.853175
balanced_accuracy_score on 4 fold :  0.880952
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  0.886905
balanced_accuracy_score on 7 fold :  0.924603
balanced_accuracy_score on 8 fold :  0.938492
balanced_accuracy_score on 9 fold :  0.075397
balanced_accuracy_score on 10 fold :  0.928571
average balanced accuracy during 1 iteration :  0.803766
balanced_accuracy_score on 1 fold :  0.863636
balanced_accuracy_score on 2 fold :  0.781169
balanced_accuracy_score on 3 fold :  0.853175
balanced_accuracy_score on 4 fold :  0.880952
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  0.886905
balanced_accuracy_score on 7 fold :  0.924603
balanced_accuracy_score on 8 fold :  0.938492
balanced_accuracy_score on 9 fold :  0.075397
balanced_accuracy_score on 10 fold :  0.928571
average balanced accuracy during 2 iteration :  0.803766
balanced_a

In [112]:
kaiser_modelling( X, y )

Kaiser rule :  7
balanced_accuracy_score on 1 fold :  0.903247
balanced_accuracy_score on 2 fold :  0.849351
balanced_accuracy_score on 3 fold :  0.886905
balanced_accuracy_score on 4 fold :  0.952381
balanced_accuracy_score on 5 fold :  0.938492
balanced_accuracy_score on 6 fold :  0.924603
balanced_accuracy_score on 7 fold :  0.962302
balanced_accuracy_score on 8 fold :  0.914683
balanced_accuracy_score on 9 fold :  0.920635
balanced_accuracy_score on 10 fold :  0.961905
average balanced accuracy during 1 iteration :  0.921450
balanced_accuracy_score on 1 fold :  0.903247
balanced_accuracy_score on 2 fold :  0.849351
balanced_accuracy_score on 3 fold :  0.886905
balanced_accuracy_score on 4 fold :  0.952381
balanced_accuracy_score on 5 fold :  0.938492
balanced_accuracy_score on 6 fold :  0.924603
balanced_accuracy_score on 7 fold :  0.962302
balanced_accuracy_score on 8 fold :  0.914683
balanced_accuracy_score on 9 fold :  0.920635
balanced_accuracy_score on 10 fold :  0.961905
aver

balanced_accuracy_score on 9 fold :  0.986111
balanced_accuracy_score on 10 fold :  0.985714
average balanced accuracy during 10 iteration :  0.931688
Average balanced accuracy of LDA after 10 iterations : 0.931688
balanced_accuracy_score on 1 fold :  0.931818
balanced_accuracy_score on 2 fold :  0.954545
balanced_accuracy_score on 3 fold :  0.876984
balanced_accuracy_score on 4 fold :  0.914683
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  0.962302
balanced_accuracy_score on 7 fold :  0.924603
balanced_accuracy_score on 8 fold :  0.938492
balanced_accuracy_score on 9 fold :  0.972222
balanced_accuracy_score on 10 fold :  0.928571
average balanced accuracy during 1 iteration :  0.930898
balanced_accuracy_score on 1 fold :  0.931818
balanced_accuracy_score on 2 fold :  0.954545
balanced_accuracy_score on 3 fold :  0.876984
balanced_accuracy_score on 4 fold :  0.914683
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  

In [113]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.849351
balanced_accuracy_score on 2 fold :  0.789610
balanced_accuracy_score on 3 fold :  0.876984
balanced_accuracy_score on 4 fold :  0.904762
balanced_accuracy_score on 5 fold :  0.914683
balanced_accuracy_score on 6 fold :  0.896825
balanced_accuracy_score on 7 fold :  0.962302
balanced_accuracy_score on 8 fold :  0.934524
balanced_accuracy_score on 9 fold :  0.896825
balanced_accuracy_score on 10 fold :  0.938095
average balanced accuracy during 1 iteration :  0.896396
balanced_accuracy_score on 1 fold :  0.849351
balanced_accuracy_score on 2 fold :  0.789610
balanced_accuracy_score on 3 fold :  0.876984
balanced_accuracy_score on 4 fold :  0.904762
balanced_accuracy_score on 5 fold :  0.914683
balanced_accuracy_score on 6 fold :  0.896825
balanced_accuracy_score on 7 fold :  0.962302
balanced_accuracy_score on 8 fold :  0.934524
balanced_accuracy_score on 9 fold :  0.896825
balanced_accuracy_score on 10 fold :  0.93809

balanced_accuracy_score on 2 fold :  0.781169
balanced_accuracy_score on 3 fold :  0.853175
balanced_accuracy_score on 4 fold :  0.880952
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  0.886905
balanced_accuracy_score on 7 fold :  0.924603
balanced_accuracy_score on 8 fold :  0.938492
balanced_accuracy_score on 9 fold :  0.075397
balanced_accuracy_score on 10 fold :  0.928571
average balanced accuracy during 1 iteration :  0.803766
balanced_accuracy_score on 1 fold :  0.863636
balanced_accuracy_score on 2 fold :  0.781169
balanced_accuracy_score on 3 fold :  0.853175
balanced_accuracy_score on 4 fold :  0.880952
balanced_accuracy_score on 5 fold :  0.904762
balanced_accuracy_score on 6 fold :  0.886905
balanced_accuracy_score on 7 fold :  0.924603
balanced_accuracy_score on 8 fold :  0.938492
balanced_accuracy_score on 9 fold :  0.075397
balanced_accuracy_score on 10 fold :  0.928571
average balanced accuracy during 2 iteration :  0.803766
balanced_a

# Banknote Dataset

In [114]:
data = loadmat( 'Databases/Banknote.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(1372, 4)
(1372, 1)


In [115]:
conditional_modelling( X, y )

Conditional number :  2
balanced_accuracy_score on 1 fold :  0.905365
balanced_accuracy_score on 2 fold :  0.913562
balanced_accuracy_score on 3 fold :  0.835850
balanced_accuracy_score on 4 fold :  0.847282
balanced_accuracy_score on 5 fold :  0.858822
balanced_accuracy_score on 6 fold :  0.776747
balanced_accuracy_score on 7 fold :  0.886648
balanced_accuracy_score on 8 fold :  0.904767
balanced_accuracy_score on 9 fold :  0.889884
balanced_accuracy_score on 10 fold :  0.819456
average balanced accuracy during 1 iteration :  0.863838
balanced_accuracy_score on 1 fold :  0.905365
balanced_accuracy_score on 2 fold :  0.913562
balanced_accuracy_score on 3 fold :  0.835850
balanced_accuracy_score on 4 fold :  0.847282
balanced_accuracy_score on 5 fold :  0.858822
balanced_accuracy_score on 6 fold :  0.776747
balanced_accuracy_score on 7 fold :  0.886648
balanced_accuracy_score on 8 fold :  0.904767
balanced_accuracy_score on 9 fold :  0.889884
balanced_accuracy_score on 10 fold :  0.8194

balanced_accuracy_score on 1 fold :  0.735257
balanced_accuracy_score on 2 fold :  0.753353
balanced_accuracy_score on 3 fold :  0.665121
balanced_accuracy_score on 4 fold :  0.709448
balanced_accuracy_score on 5 fold :  0.699633
balanced_accuracy_score on 6 fold :  0.698016
balanced_accuracy_score on 7 fold :  0.771894
balanced_accuracy_score on 8 fold :  0.740617
balanced_accuracy_score on 9 fold :  0.760354
balanced_accuracy_score on 10 fold :  0.704594
average balanced accuracy during 1 iteration :  0.723829
balanced_accuracy_score on 1 fold :  0.735257
balanced_accuracy_score on 2 fold :  0.753353
balanced_accuracy_score on 3 fold :  0.665121
balanced_accuracy_score on 4 fold :  0.709448
balanced_accuracy_score on 5 fold :  0.699633
balanced_accuracy_score on 6 fold :  0.698016
balanced_accuracy_score on 7 fold :  0.771894
balanced_accuracy_score on 8 fold :  0.740617
balanced_accuracy_score on 9 fold :  0.760354
balanced_accuracy_score on 10 fold :  0.704594
average balanced accu

In [116]:
kaiser_modelling( X, y )

Kaiser rule :  1
balanced_accuracy_score on 1 fold :  0.679902
balanced_accuracy_score on 2 fold :  0.727379
balanced_accuracy_score on 3 fold :  0.712791
balanced_accuracy_score on 4 fold :  0.789905
balanced_accuracy_score on 5 fold :  0.740509
balanced_accuracy_score on 6 fold :  0.658542
balanced_accuracy_score on 7 fold :  0.706104
balanced_accuracy_score on 8 fold :  0.727351
balanced_accuracy_score on 9 fold :  0.704379
balanced_accuracy_score on 10 fold :  0.729077
average balanced accuracy during 1 iteration :  0.717594
balanced_accuracy_score on 1 fold :  0.679902
balanced_accuracy_score on 2 fold :  0.727379
balanced_accuracy_score on 3 fold :  0.712791
balanced_accuracy_score on 4 fold :  0.789905
balanced_accuracy_score on 5 fold :  0.740509
balanced_accuracy_score on 6 fold :  0.658542
balanced_accuracy_score on 7 fold :  0.706104
balanced_accuracy_score on 8 fold :  0.727351
balanced_accuracy_score on 9 fold :  0.704379
balanced_accuracy_score on 10 fold :  0.729077
aver

balanced_accuracy_score on 1 fold :  0.606132
balanced_accuracy_score on 2 fold :  0.612625
balanced_accuracy_score on 3 fold :  0.592968
balanced_accuracy_score on 4 fold :  0.571613
balanced_accuracy_score on 5 fold :  0.625863
balanced_accuracy_score on 6 fold :  0.632442
balanced_accuracy_score on 7 fold :  0.650561
balanced_accuracy_score on 8 fold :  0.578192
balanced_accuracy_score on 9 fold :  0.545298
balanced_accuracy_score on 10 fold :  0.591350
average balanced accuracy during 1 iteration :  0.600704
balanced_accuracy_score on 1 fold :  0.606132
balanced_accuracy_score on 2 fold :  0.612625
balanced_accuracy_score on 3 fold :  0.592968
balanced_accuracy_score on 4 fold :  0.571613
balanced_accuracy_score on 5 fold :  0.625863
balanced_accuracy_score on 6 fold :  0.632442
balanced_accuracy_score on 7 fold :  0.650561
balanced_accuracy_score on 8 fold :  0.578192
balanced_accuracy_score on 9 fold :  0.545298
balanced_accuracy_score on 10 fold :  0.591350
average balanced accu

In [117]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.679902
balanced_accuracy_score on 2 fold :  0.727379
balanced_accuracy_score on 3 fold :  0.712791
balanced_accuracy_score on 4 fold :  0.789905
balanced_accuracy_score on 5 fold :  0.740509
balanced_accuracy_score on 6 fold :  0.658542
balanced_accuracy_score on 7 fold :  0.706104
balanced_accuracy_score on 8 fold :  0.727351
balanced_accuracy_score on 9 fold :  0.704379
balanced_accuracy_score on 10 fold :  0.729077
average balanced accuracy during 1 iteration :  0.717594
balanced_accuracy_score on 1 fold :  0.679902
balanced_accuracy_score on 2 fold :  0.727379
balanced_accuracy_score on 3 fold :  0.712791
balanced_accuracy_score on 4 fold :  0.789905
balanced_accuracy_score on 5 fold :  0.740509
balanced_accuracy_score on 6 fold :  0.658542
balanced_accuracy_score on 7 fold :  0.706104
balanced_accuracy_score on 8 fold :  0.727351
balanced_accuracy_score on 9 fold :  0.704379
balanced_accuracy_score on 10 fold :  0.72907

balanced_accuracy_score on 2 fold :  0.612625
balanced_accuracy_score on 3 fold :  0.592968
balanced_accuracy_score on 4 fold :  0.571613
balanced_accuracy_score on 5 fold :  0.625863
balanced_accuracy_score on 6 fold :  0.632442
balanced_accuracy_score on 7 fold :  0.650561
balanced_accuracy_score on 8 fold :  0.578192
balanced_accuracy_score on 9 fold :  0.545298
balanced_accuracy_score on 10 fold :  0.591350
average balanced accuracy during 1 iteration :  0.600704
balanced_accuracy_score on 1 fold :  0.606132
balanced_accuracy_score on 2 fold :  0.612625
balanced_accuracy_score on 3 fold :  0.592968
balanced_accuracy_score on 4 fold :  0.571613
balanced_accuracy_score on 5 fold :  0.625863
balanced_accuracy_score on 6 fold :  0.632442
balanced_accuracy_score on 7 fold :  0.650561
balanced_accuracy_score on 8 fold :  0.578192
balanced_accuracy_score on 9 fold :  0.545298
balanced_accuracy_score on 10 fold :  0.591350
average balanced accuracy during 2 iteration :  0.600704
balanced_a

# Blood Dataset

In [118]:
data = loadmat( 'Databases/blood.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(748, 4)
(748, 1)


In [119]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.587719
balanced_accuracy_score on 2 fold :  0.504386
balanced_accuracy_score on 3 fold :  0.504386
balanced_accuracy_score on 4 fold :  0.501462
balanced_accuracy_score on 5 fold :  0.538012
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.577485
balanced_accuracy_score on 8 fold :  0.733918
balanced_accuracy_score on 9 fold :  0.511868
balanced_accuracy_score on 10 fold :  0.558824
average balanced accuracy during 1 iteration :  0.551806
balanced_accuracy_score on 1 fold :  0.587719
balanced_accuracy_score on 2 fold :  0.504386
balanced_accuracy_score on 3 fold :  0.504386
balanced_accuracy_score on 4 fold :  0.501462
balanced_accuracy_score on 5 fold :  0.538012
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.577485
balanced_accuracy_score on 8 fold :  0.733918
balanced_accuracy_score on 9 fold :  0.511868
balanced_accuracy_score on 10 fold :  0.5588

balanced_accuracy_score on 1 fold :  0.653509
balanced_accuracy_score on 2 fold :  0.250000
balanced_accuracy_score on 3 fold :  0.539474
balanced_accuracy_score on 4 fold :  0.545322
balanced_accuracy_score on 5 fold :  0.533626
balanced_accuracy_score on 6 fold :  0.422515
balanced_accuracy_score on 7 fold :  0.609649
balanced_accuracy_score on 8 fold :  0.359649
balanced_accuracy_score on 9 fold :  0.618679
balanced_accuracy_score on 10 fold :  0.576883
average balanced accuracy during 1 iteration :  0.510931
balanced_accuracy_score on 1 fold :  0.653509
balanced_accuracy_score on 2 fold :  0.250000
balanced_accuracy_score on 3 fold :  0.539474
balanced_accuracy_score on 4 fold :  0.545322
balanced_accuracy_score on 5 fold :  0.533626
balanced_accuracy_score on 6 fold :  0.422515
balanced_accuracy_score on 7 fold :  0.609649
balanced_accuracy_score on 8 fold :  0.359649
balanced_accuracy_score on 9 fold :  0.618679
balanced_accuracy_score on 10 fold :  0.576883
average balanced accu

In [120]:
kaiser_modelling( X, y )

Kaiser rule :  3
balanced_accuracy_score on 1 fold :  0.519006
balanced_accuracy_score on 2 fold :  0.469298
balanced_accuracy_score on 3 fold :  0.586257
balanced_accuracy_score on 4 fold :  0.548246
balanced_accuracy_score on 5 fold :  0.546784
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.559942
balanced_accuracy_score on 8 fold :  0.726608
balanced_accuracy_score on 9 fold :  0.494324
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.545046
balanced_accuracy_score on 1 fold :  0.519006
balanced_accuracy_score on 2 fold :  0.469298
balanced_accuracy_score on 3 fold :  0.586257
balanced_accuracy_score on 4 fold :  0.548246
balanced_accuracy_score on 5 fold :  0.546784
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.559942
balanced_accuracy_score on 8 fold :  0.726608
balanced_accuracy_score on 9 fold :  0.494324
balanced_accuracy_score on 10 fold :  0.500000
aver

balanced_accuracy_score on 1 fold :  0.631579
balanced_accuracy_score on 2 fold :  0.511696
balanced_accuracy_score on 3 fold :  0.460526
balanced_accuracy_score on 4 fold :  0.463450
balanced_accuracy_score on 5 fold :  0.595029
balanced_accuracy_score on 6 fold :  0.577485
balanced_accuracy_score on 7 fold :  0.573099
balanced_accuracy_score on 8 fold :  0.640351
balanced_accuracy_score on 9 fold :  0.390093
balanced_accuracy_score on 10 fold :  0.588235
average balanced accuracy during 1 iteration :  0.543154
balanced_accuracy_score on 1 fold :  0.631579
balanced_accuracy_score on 2 fold :  0.511696
balanced_accuracy_score on 3 fold :  0.460526
balanced_accuracy_score on 4 fold :  0.463450
balanced_accuracy_score on 5 fold :  0.595029
balanced_accuracy_score on 6 fold :  0.577485
balanced_accuracy_score on 7 fold :  0.573099
balanced_accuracy_score on 8 fold :  0.640351
balanced_accuracy_score on 9 fold :  0.390093
balanced_accuracy_score on 10 fold :  0.588235
average balanced accu

In [121]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.587719
balanced_accuracy_score on 2 fold :  0.504386
balanced_accuracy_score on 3 fold :  0.504386
balanced_accuracy_score on 4 fold :  0.501462
balanced_accuracy_score on 5 fold :  0.538012
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.577485
balanced_accuracy_score on 8 fold :  0.733918
balanced_accuracy_score on 9 fold :  0.511868
balanced_accuracy_score on 10 fold :  0.558824
average balanced accuracy during 1 iteration :  0.551806
balanced_accuracy_score on 1 fold :  0.587719
balanced_accuracy_score on 2 fold :  0.504386
balanced_accuracy_score on 3 fold :  0.504386
balanced_accuracy_score on 4 fold :  0.501462
balanced_accuracy_score on 5 fold :  0.538012
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.577485
balanced_accuracy_score on 8 fold :  0.733918
balanced_accuracy_score on 9 fold :  0.511868
balanced_accuracy_score on 10 fold :  0.55882

balanced_accuracy_score on 1 fold :  0.653509
balanced_accuracy_score on 2 fold :  0.250000
balanced_accuracy_score on 3 fold :  0.539474
balanced_accuracy_score on 4 fold :  0.545322
balanced_accuracy_score on 5 fold :  0.533626
balanced_accuracy_score on 6 fold :  0.422515
balanced_accuracy_score on 7 fold :  0.609649
balanced_accuracy_score on 8 fold :  0.359649
balanced_accuracy_score on 9 fold :  0.618679
balanced_accuracy_score on 10 fold :  0.576883
average balanced accuracy during 1 iteration :  0.510931
balanced_accuracy_score on 1 fold :  0.653509
balanced_accuracy_score on 2 fold :  0.250000
balanced_accuracy_score on 3 fold :  0.539474
balanced_accuracy_score on 4 fold :  0.545322
balanced_accuracy_score on 5 fold :  0.533626
balanced_accuracy_score on 6 fold :  0.422515
balanced_accuracy_score on 7 fold :  0.609649
balanced_accuracy_score on 8 fold :  0.359649
balanced_accuracy_score on 9 fold :  0.618679
balanced_accuracy_score on 10 fold :  0.576883
average balanced accu

# Climate Dataset

In [122]:
data = loadmat( 'Databases/climate.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(540, 18)
(540, 1)


In [123]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.489796
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.498980
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.489796
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.5000

balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.500000
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 2 iteration :  0.500000
balanced_a

In [124]:
kaiser_modelling( X, y )

Kaiser rule :  1
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.489796
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.498980
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.489796
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
aver

balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.500000
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 2 iteration :  0.500000
balanced_a

In [125]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.489796
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.498980
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.489796
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.50000

balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 1 iteration :  0.500000
balanced_accuracy_score on 1 fold :  0.500000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  0.500000
balanced_accuracy_score on 4 fold :  0.500000
balanced_accuracy_score on 5 fold :  0.500000
balanced_accuracy_score on 6 fold :  0.500000
balanced_accuracy_score on 7 fold :  0.500000
balanced_accuracy_score on 8 fold :  0.500000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.500000
average balanced accuracy during 2 iteration :  0.500000
balanced_a

# Colposcopy Dataset

In [128]:
data = loadmat( 'Databases/Cryotherapy.mat' )

X = data['X']

y = data['Y']

print( X.shape )

print( y.shape )

(90, 6)
(90, 1)


In [129]:
conditional_modelling( X, y )

Conditional number :  1
balanced_accuracy_score on 1 fold :  0.550000
balanced_accuracy_score on 2 fold :  0.600000
balanced_accuracy_score on 3 fold :  0.750000
balanced_accuracy_score on 4 fold :  0.350000
balanced_accuracy_score on 5 fold :  0.650000
balanced_accuracy_score on 6 fold :  0.625000
balanced_accuracy_score on 7 fold :  0.450000
balanced_accuracy_score on 8 fold :  0.650000
balanced_accuracy_score on 9 fold :  0.525000
balanced_accuracy_score on 10 fold :  0.550000
average balanced accuracy during 1 iteration :  0.570000
balanced_accuracy_score on 1 fold :  0.550000
balanced_accuracy_score on 2 fold :  0.600000
balanced_accuracy_score on 3 fold :  0.750000
balanced_accuracy_score on 4 fold :  0.350000
balanced_accuracy_score on 5 fold :  0.650000
balanced_accuracy_score on 6 fold :  0.625000
balanced_accuracy_score on 7 fold :  0.450000
balanced_accuracy_score on 8 fold :  0.650000
balanced_accuracy_score on 9 fold :  0.525000
balanced_accuracy_score on 10 fold :  0.5500

balanced_accuracy_score on 3 fold :  0.550000
balanced_accuracy_score on 4 fold :  0.450000
balanced_accuracy_score on 5 fold :  0.625000
balanced_accuracy_score on 6 fold :  0.400000
balanced_accuracy_score on 7 fold :  0.700000
balanced_accuracy_score on 8 fold :  0.625000
balanced_accuracy_score on 9 fold :  0.625000
balanced_accuracy_score on 10 fold :  0.450000
average balanced accuracy during 1 iteration :  0.550000
balanced_accuracy_score on 1 fold :  0.675000
balanced_accuracy_score on 2 fold :  0.400000
balanced_accuracy_score on 3 fold :  0.550000
balanced_accuracy_score on 4 fold :  0.450000
balanced_accuracy_score on 5 fold :  0.625000
balanced_accuracy_score on 6 fold :  0.400000
balanced_accuracy_score on 7 fold :  0.700000
balanced_accuracy_score on 8 fold :  0.625000
balanced_accuracy_score on 9 fold :  0.625000
balanced_accuracy_score on 10 fold :  0.450000
average balanced accuracy during 2 iteration :  0.550000
balanced_accuracy_score on 1 fold :  0.675000
balanced_a

In [130]:
kaiser_modelling( X, y )

Kaiser rule :  4
balanced_accuracy_score on 1 fold :  0.650000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  1.000000
balanced_accuracy_score on 4 fold :  0.575000
balanced_accuracy_score on 5 fold :  0.875000
balanced_accuracy_score on 6 fold :  0.750000
balanced_accuracy_score on 7 fold :  0.700000
balanced_accuracy_score on 8 fold :  0.675000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.900000
average balanced accuracy during 1 iteration :  0.712500
balanced_accuracy_score on 1 fold :  0.650000
balanced_accuracy_score on 2 fold :  0.500000
balanced_accuracy_score on 3 fold :  1.000000
balanced_accuracy_score on 4 fold :  0.575000
balanced_accuracy_score on 5 fold :  0.875000
balanced_accuracy_score on 6 fold :  0.750000
balanced_accuracy_score on 7 fold :  0.700000
balanced_accuracy_score on 8 fold :  0.675000
balanced_accuracy_score on 9 fold :  0.500000
balanced_accuracy_score on 10 fold :  0.900000
aver

balanced_accuracy_score on 1 fold :  0.775000
balanced_accuracy_score on 2 fold :  0.700000
balanced_accuracy_score on 3 fold :  0.675000
balanced_accuracy_score on 4 fold :  0.800000
balanced_accuracy_score on 5 fold :  0.875000
balanced_accuracy_score on 6 fold :  0.625000
balanced_accuracy_score on 7 fold :  0.800000
balanced_accuracy_score on 8 fold :  0.750000
balanced_accuracy_score on 9 fold :  0.775000
balanced_accuracy_score on 10 fold :  0.800000
average balanced accuracy during 1 iteration :  0.757500
balanced_accuracy_score on 1 fold :  0.775000
balanced_accuracy_score on 2 fold :  0.700000
balanced_accuracy_score on 3 fold :  0.675000
balanced_accuracy_score on 4 fold :  0.800000
balanced_accuracy_score on 5 fold :  0.875000
balanced_accuracy_score on 6 fold :  0.625000
balanced_accuracy_score on 7 fold :  0.800000
balanced_accuracy_score on 8 fold :  0.750000
balanced_accuracy_score on 9 fold :  0.775000
balanced_accuracy_score on 10 fold :  0.800000
average balanced accu

In [131]:
broken_modelling( X, y )

Broken Stick Rule :  1
balanced_accuracy_score on 1 fold :  0.550000
balanced_accuracy_score on 2 fold :  0.600000
balanced_accuracy_score on 3 fold :  0.750000
balanced_accuracy_score on 4 fold :  0.350000
balanced_accuracy_score on 5 fold :  0.650000
balanced_accuracy_score on 6 fold :  0.625000
balanced_accuracy_score on 7 fold :  0.450000
balanced_accuracy_score on 8 fold :  0.650000
balanced_accuracy_score on 9 fold :  0.525000
balanced_accuracy_score on 10 fold :  0.550000
average balanced accuracy during 1 iteration :  0.570000
balanced_accuracy_score on 1 fold :  0.550000
balanced_accuracy_score on 2 fold :  0.600000
balanced_accuracy_score on 3 fold :  0.750000
balanced_accuracy_score on 4 fold :  0.350000
balanced_accuracy_score on 5 fold :  0.650000
balanced_accuracy_score on 6 fold :  0.625000
balanced_accuracy_score on 7 fold :  0.450000
balanced_accuracy_score on 8 fold :  0.650000
balanced_accuracy_score on 9 fold :  0.525000
balanced_accuracy_score on 10 fold :  0.55000

balanced_accuracy_score on 3 fold :  0.550000
balanced_accuracy_score on 4 fold :  0.450000
balanced_accuracy_score on 5 fold :  0.625000
balanced_accuracy_score on 6 fold :  0.400000
balanced_accuracy_score on 7 fold :  0.700000
balanced_accuracy_score on 8 fold :  0.625000
balanced_accuracy_score on 9 fold :  0.625000
balanced_accuracy_score on 10 fold :  0.450000
average balanced accuracy during 1 iteration :  0.550000
balanced_accuracy_score on 1 fold :  0.675000
balanced_accuracy_score on 2 fold :  0.400000
balanced_accuracy_score on 3 fold :  0.550000
balanced_accuracy_score on 4 fold :  0.450000
balanced_accuracy_score on 5 fold :  0.625000
balanced_accuracy_score on 6 fold :  0.400000
balanced_accuracy_score on 7 fold :  0.700000
balanced_accuracy_score on 8 fold :  0.625000
balanced_accuracy_score on 9 fold :  0.625000
balanced_accuracy_score on 10 fold :  0.450000
average balanced accuracy during 2 iteration :  0.550000
balanced_accuracy_score on 1 fold :  0.675000
balanced_a