In [2]:
import pandas as pd 
from scipy.stats import chi2
import numpy as np
from numba import jit, prange
%matplotlib inline

def Normalization(Data):
    """
    Normalize the data 
    """
    Mean1 = np.mean(Data, axis = 0)
    Std1  = np.std(Data, axis = 0)
    return (Data-Mean1)/Std1

def ReturnDataFrame(path):
    """
    Read df into memory
    """
    return pd.read_csv(path, sep=',',skipinitialspace=True)  

def MahalonobisDetection(Data, alpha):
    """
    Old Version
    """
    Data = Data - np.mean(Data, axis = 0)
    n1,n2 = Data.shape
    Cov = (1/float(n1-1))*np.dot(Data.T,Data)
    M = np.zeros(n1)
    # Using extra memory
    RightP = np.dot(np.linalg.inv(Cov),Data.T)
    for i in range(0,n1):
        M[i] = np.dot(Data[i,:],RightP[:,i])
    c = chi2.isf(alpha,n2)
    return M, c, Cov

@jit(parallel=True, nogil=True, cache=True)
def NumbaMahalonobisDetection(Data, alpha):
    """
    The numba version
    """
    # Get shape of data
    n1,n2 = Data.shape
    # substract the mean
    Data = Data - Data.mean(axis = 0)
    # Get the Covariance
    Cov = (1/float(n1-1))*np.dot(Data.T,Data)
    M = np.zeros(n1)
    # Using extra memory
    RightP = np.dot(np.linalg.inv(Cov),Data.T)
    # Here we use a paralel version 
    for i in prange(0,n1):
        M[i] = np.dot(Data[i,:],RightP[:,i])
    c = chi_statistics(alpha, n2)
    
    return M, c, Cov  

@jit(nopython=True, parallel=True, nogil=True, cache=True)
def chi_statistics(alpha, k):
    """
    Getting the value for X^2_{alpha,k}
    """
    # Wilson and Hilferty approximation
    return k*np.power(z(alpha)*np.sqrt(2.0/(9.0*k))+(1.0-(2.0/(9.0*k))),2)
    
@jit(nopython=True, parallel=True, nogil=True, cache=True)
def z(alpha):
    """
    Z score with level of confidence alpha z = x
    Using Shore approximation 1982 
    """
    # Get the CDF value
    p = 1.0-alpha
    
    z = 5.5556*(1.0-np.power(((1.0-p)/p),0.1986))
    
    return z

In [3]:
# Load CVS
Path1 = 'voice.csv'
DataMatrix = ReturnDataFrame(Path1)

# Shuffle the data randomly
DataMatrix = DataMatrix.sample(frac=1).reset_index(drop=True)
DataMatrix.replace({'male': 1.0, 'female': -1.0},
                  inplace=True)
DataLabels = DataMatrix['label']
DataMatrix.drop('label', axis=1, inplace=True)
# Transform to an NP Array
Data = DataMatrix.values
Label = DataLabels.values

fmask = (Label == 1.0)
mmask = (Label == -1.0)

# Normalize your Data # 
NData = np.asmatrix(Normalization(Data))

Class1 = NData[fmask,:]
Class2 = NData[mmask,:]

alpha = 0.10

In [22]:
%%timeit -n 10
# Testing the functions
M1, c1, _ = MahalonobisDetection(Class1, alpha)

20.4 ms ± 976 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [23]:
%%timeit -n 10
# Testing the functions
M2, c2, _ = NumbaMahalonobisDetection(Class1, alpha)

27.8 ms ± 2.27 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
print(c1)
print(c2)

19.337429229428256
19.55802469135803


In [16]:
M1

array([-0.6875, -8.    , 33.875 , ...,  8.25  , 16.    ,  8.375 ])

In [17]:
M2

array([-0.67904257, -8.8924327 , 33.9865082 , ...,  8.44533869,
       16.01458331,  8.47641256])