In [11]:
import os

import pandas as pd

import numpy as np

from scipy.io import loadmat

from sklearn.metrics import balanced_accuracy_score

from sklearn.model_selection import StratifiedKFold, train_test_split

import warnings

warnings.filterwarnings("ignore")

import scipy.stats as stats

In [12]:
# Logistic Regression

class LogitRegression() :
    
    
    
    def __init__( self, learning_rate, iterations ) :        
        
        self.learning_rate = learning_rate        
        
        self.iterations = iterations
        
        
        
          
    # Function for model training   
    
    def fit( self, X, Y ) :        
        
        # no_of_training_examples, no_of_features        
        
        self.m, self.n = X.shape        
        
        # weight initialization        
        
        self.W = np.zeros( self.n )        
        
        self.b = 0        
        
        self.X = X        
        
        self.Y = Y
          
        # gradient descent learning
                  
        for i in range( self.iterations ) :            
            
            self.update_weights()            
        
        return self
      
    
    
    # Helper function to update weights in gradient descent
      
    def update_weights( self ) :           
        
        A = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )
          
        # calculate gradients        
        
        tmp = ( A - self.Y.T )        
        
        tmp = np.reshape( tmp, self.m  )        
        
        dW = np.dot( self.X.T, tmp ) / self.m         
        
        db = np.sum( tmp ) / self.m 
          
        # update weights    
        
        self.W = self.W - self.learning_rate * dW    
        
        self.b = self.b - self.learning_rate * db
          
        return self
      
    
    
    # Hypothetical function  h( x ) 
      
    def predict( self, X ) :    
        
        Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )          
        
        return Z

In [13]:
def threshold_selection( z, y  ) :
    
    
    # total number of examples 
    
    total = np.size( z )
    
    
    # rehshape y into 1 D as of z
    
    y = np.reshape( y, ( total ) )
    
    
    
    # define set of unique values with all possible points
    
    thres = np.unique( z )
    
    # add all borders
    
    thres = ( thres[1:] + thres[:-1] ) / 2
    
    
    
    
    # selecting threshold with best error
    
    besterror = total
    
    bestthres = 0
    
    for t in thres :
        
        y_hat = np.where( z > t, 1, 0)
        
        error = sum( y_hat != y )
        
        if( error < besterror ) :
            
            besterror = error
            
            bestthres = t
    
    return bestthres

In [None]:
files = os.listdir( './Databases/' )

Databases = []

Thresholds = []


for file in files :
    
    
    # read one database at a time
    
    data = loadmat( './Databases/' + file )
    
    X = data['X']    
    
    # standarise data
    
    X = stats.zscore( X )
    
    

    y = data['Y']
    
    ( m , n ) = X.shape
    

    try :
            
        model = LogitRegression( learning_rate = 0.01, iterations = 500 )
        
        model.fit( X, y )
        
        z = model.predict( X )
        
        bestthres = threshold_selection( z, y )
        
        Databases.append( file )
        
        Thresholds.append( bestthres )
            
    except :
        
            
        continue

In [None]:
Thres = { 'Databases' : Databases, 'Thresholds' : Thresholds }

Thres = pd.DataFrame( Thres )

Thres

In [None]:
Thres.to_csv( 'Thres.csv' )