In [35]:
import pandas as pd 
from scipy.stats import chi2
import numpy as np
from numba import jit, prange
%matplotlib inline

def Normalization(Data):
    """
    Normalize the data 
    """
    Mean1 = np.mean(Data, axis = 0)
    Std1  = np.std(Data, axis = 0)
    return (Data-Mean1)/Std1

def ReturnDataFrame(path):
    """
    Read df into memory
    """
    return pd.read_csv(path, sep=',',skipinitialspace=True)  

def MahalonobisDetection(Data, alpha):
    """
    Old Version
    """
    Data = Data - np.mean(Data, axis = 0)
    n1,n2 = Data.shape
    Cov = (1/float(n1-1))*np.dot(Data.T,Data)
    M = np.zeros(n1)
    # Using extra memory
    RightP = np.dot(np.linalg.inv(Cov),Data.T)
    for i in range(0,n1):
        M[i] = np.dot(Data[i,:],RightP[:,i])
    # Numb
    c = chi2.isf(alpha,n2)
    return M, c, Cov

@jit(nopython=True, parallel=False) # nogil=True, cache=True)
def NumbaMahalonobisDetection(Data, alpha):
    """
    The numba version
    """
    # Get shape of data
    n1,n2 = Data.shape
    # substract the mean
    Data = Data - Data.mean(axis = 0)
    # Get the Covariance
    Cov = (1/float(n1-1))*np.dot(Data.T,Data)
    M = np.zeros(n1)
    # Using extra memory
    RightP = np.dot(np.linalg.inv(Cov),Data.T)
    # Here we use a paralel version 
    for i in range(n1):
        M[i] = np.dot(Data[i,:],RightP[:,i])
    # chage this to gaussian distributions use a trick
    c = chi_statistics(alpha, n2)
    
    return M, c, Cov  

@jit(nopython=True, parallel=True, nogil=True, cache=True)
def chi_statistics(alpha, k):
    """
    Getting the value for X^2_{alpha,k}
    """
    # Wilson and Hilferty approximation
    return k*np.power(z(alpha)*np.sqrt(2.0/(9.0*k))+(1.0-(2.0/(9.0*k))),2)
    
@jit(nopython=True, parallel=True, nogil=True, cache=True)
def z(alpha):
    """
    Z score with level of confidence alpha z = x
    Using Shore approximation 1982 
    """
    # Get the CDF value
    p = 1.0-alpha
    
    z = 5.5556*(1.0-np.power(((1.0-p)/p),0.1986))
    
    return z

In [36]:
# Load CVS
Path1 = 'voice.csv'
DataMatrix = ReturnDataFrame(Path1)

# Shuffle the data randomly
DataMatrix = DataMatrix.sample(frac=1).reset_index(drop=True)
DataMatrix.replace({'male': 1.0, 'female': -1.0},
                  inplace=True)
DataLabels = DataMatrix['label']
DataMatrix.drop('label', axis=1, inplace=True)
# Transform to an NP Array
Data = DataMatrix.values
Label = DataLabels.values

fmask = (Label == 1.0)
mmask = (Label == -1.0)

# Normalize your Data # 
NData = np.asmatrix(Normalization(Data))

Class1 = NData[fmask,:]
Class2 = NData[mmask,:]

alpha = 0.10

In [37]:
%%timeit -n 10 -r 10
# Testing the functions
M1, c1, _ = MahalonobisDetection(Class1, alpha)

20.1 ms ± 883 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)


In [38]:
%%timeit -n 100 -r 10
# Testing the functions
M2, c2, _ = NumbaMahalonobisDetection(Class1, alpha)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Internal error at <numba.typeinfer.CallConstraint object at 0x7f65755740b8>:
--%<----------------------------------------------------------------------------
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/numba/errors.py", line 617, in new_error_context
    yield
  File "/usr/local/lib/python3.5/dist-packages/numba/typeinfer.py", line 422, in __call__
    self.resolve(typeinfer, typevars, fnty)
  File "/usr/local/lib/python3.5/dist-packages/numba/typeinfer.py", line 446, in resolve
    sig = typeinfer.resolve_call(fnty, pos_args, kw_args)
  File "/usr/local/lib/python3.5/dist-packages/numba/typeinfer.py", line 1232, in resolve_call
    return self.context.resolve_function_type(fnty, pos_args, kw_args)
  File "/usr/local/lib/python3.5/dist-packages/numba/typing/context.py", line 190, in resolve_function_type
    res = self._resolve_user_function_type(func, args, kws)
  File "/usr/local/lib/python3.5/dist-packages/numba/typing/context.py", line 242, in _resolve_user_function_type
    return func.get_call_type(self, args, kws)
  File "/usr/local/lib/python3.5/dist-packages/numba/types/functions.py", line 217, in get_call_type
    out = template.apply(args, kws)
  File "/usr/local/lib/python3.5/dist-packages/numba/typing/templates.py", line 206, in apply
    sig = generic(args, kws)
  File "/usr/local/lib/python3.5/dist-packages/numba/typing/arraydecl.py", line 646, in generic_hetero_real
    assert not kws
AssertionError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/numba/typeinfer.py", line 141, in propagate
    constraint(typeinfer)
  File "/usr/local/lib/python3.5/dist-packages/numba/typeinfer.py", line 422, in __call__
    self.resolve(typeinfer, typevars, fnty)
  File "/usr/lib/python3.5/contextlib.py", line 77, in __exit__
    self.gen.throw(type, value, traceback)
  File "/usr/local/lib/python3.5/dist-packages/numba/errors.py", line 625, in new_error_context
    six.reraise(type(newerr), newerr, tb)
  File "/usr/local/lib/python3.5/dist-packages/numba/six.py", line 659, in reraise
    raise value
numba.errors.InternalError: 
[1] During: resolving callee type: BoundFunction(array.mean for array(float64, 2d, C))
[2] During: typing of call at <ipython-input-35-8a4241015b52> (45)

--%<----------------------------------------------------------------------------


File "<ipython-input-35-8a4241015b52>", line 45:
def NumbaMahalonobisDetection(Data, alpha):
    <source elided>
    # substract the mean
    Data = Data - Data.mean(axis = 0)
    ^

This is not usually a problem with Numba itself but instead often caused by
the use of unsupported features or an issue in resolving types.

To see Python/NumPy features supported by the latest release of Numba visit:
http://numba.pydata.org/numba-doc/dev/reference/pysupported.html
and
http://numba.pydata.org/numba-doc/dev/reference/numpysupported.html

For more information about typing errors and how to debug them visit:
http://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile

If you think your code should work with Numba, please report the error message
and traceback, along with a minimal reproducer at:
https://github.com/numba/numba/issues/new


In [20]:
print(c1)
print(c2)

NameError: name 'c1' is not defined

In [21]:
print(M1[0])
print(M2[0])

NameError: name 'M1' is not defined