In [1]:
#importing the files
import numpy as np
import pandas as pda
from sklearn.model_selection import train_test_split as tts
import matplotlib.pyplot as plt
from numpy.linalg import inv
import math
import itertools
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
microData = pda.read_csv('Microchip_Data.csv')

In [3]:
trainingSet,testingSet = tts(microData,test_size=0.3,random_state= 6)

In [4]:
trainingX = trainingSet[microData.columns[0:2]].copy()
trainingY = trainingSet['Pass/Fail'].copy()
testingX = testingSet[microData.columns[0:2]].copy()
testingY = testingSet['Pass/Fail'].copy()

In [5]:
trainingX = trainingX.to_numpy()
trainingY = trainingY.to_numpy()
testingX = testingX.to_numpy()
testingY = testingY.to_numpy()

In [6]:
mini = np.min(trainingX,axis=0)
maxi = np.max(trainingX,axis=0)

In [7]:
trainingX = (trainingX - mini)/(maxi - mini)

In [8]:
mini = np.min(testingX,axis=0)
maxi = np.max(testingX,axis=0)

In [9]:
testingX = (testingX - mini)/(maxi - mini)

In [10]:
u1 = trainingX[:,0]
u2 = trainingX[:,1]

In [11]:
def boxMullerTransformation(u1,u2):
      z1 = np.sqrt(-2*np.log(u1))*np.cos(2*np.pi*u2)
      z2 = np.sqrt(-2*np.log(u1))*np.sin(2*np.pi*u2)
      return z1,z2
  

In [12]:
z1,z2 = boxMullerTransformation(u1,u2)

  
  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
trainY = []
X_one = []
X_zero = []
for i in range(len(trainingY)):
    if(z1[i]!= -math.inf and z1[i]!= math.inf and z2[i]!= -math.inf and z2[i]!= math.inf):
        if trainingY[i] == 1:
            X_one.append([z1[i],z2[i]])
        else:
            X_zero.append([z1[i],z2[i]])
        trainY.append(trainingY[i])
        

In [14]:
phi = float(len(X_one)/(len(X_one)+len(X_zero)))
mu0 = np.sum(np.matrix(X_zero),axis = 0)/len(X_zero)
mu1 = np.sum(np.matrix(X_one),axis = 0)/len(X_one)
print("Phi = ", phi)
print("mu0 = ", mu0)
print("mu1 = ", mu1)

Phi =  0.4444444444444444
mu0 =  [[0.23553374 0.0951008 ]]
mu1 =  [[-0.26018669 -0.04174775]]


In [15]:
sigma = np.zeros((2,2))
sigma0 = np.zeros((2,2))
sigma1 = np.zeros((2,2))

In [16]:
for i in range(len(X_zero)):
    sigma0 += np.dot(np.transpose(X_zero[i]-mu0),X_zero[i]-mu0)
    
for i in range(len(X_one)):
    sigma1 += np.dot(np.transpose(X_one[i]-mu1),X_one[i]-mu1)

In [17]:
sigma = (sigma1 + sigma0)/len(trainY)
sigma0 /= len(X_one)
sigma1 /= len(X_zero)
print("Sigma = ",sigma)
print("Sigma0 = ",sigma0)
print("Sigma1 = ",sigma1)

Sigma =  [[ 0.67859136 -0.04991619]
 [-0.04991619  1.33673841]]
Sigma0 =  [[ 0.96288516 -0.15686556]
 [-0.15686556  1.75783922]]
Sigma1 =  [[0.45115632 0.0356433 ]
 [0.0356433  0.99985777]]


In [18]:
def probFunction(x,mu,sigma):
    m = len(x)
    if m == mu.shape[1] and (m,m) == sigma.shape:
        deter = np.linalg.det(sigma)
        assert deter!=0, "matrix cannot be singular"
        
        temp = 1.0/(np.power((2*np.pi),float(m)/2)*np.power(deter,1.0/2))
        xmu = np.matrix(x-mu)
        siginv = inv(sigma)
        res = np.power(np.e,-0.5*(np.dot(np.dot(xmu,siginv),np.transpose(xmu))))
        return res*temp


In [19]:
predictedY = []
for x in testingX:
    pa = probFunction(x,np.squeeze(mu0),np.matrix(sigma0))
    pc = probFunction(x,np.squeeze(mu1),np.matrix(sigma1))
    print(pa , pc)
    if (pa<=pc):
        predictedY.append(1)
    else:
        predictedY.append(0)

[[0.09332424]] [[0.04579939]]
[[0.1014882]] [[0.08886156]]
[[0.09998851]] [[0.0929653]]
[[0.12165498]] [[0.15454905]]
[[0.10670083]] [[0.15320678]]
[[0.11376384]] [[0.15239518]]
[[0.07821309]] [[0.04109959]]
[[0.12181813]] [[0.18449405]]
[[0.08606325]] [[0.05306357]]
[[0.11487783]] [[0.10649379]]
[[0.11552915]] [[0.16465249]]
[[0.1120405]] [[0.1347235]]
[[0.09065917]] [[0.07186717]]
[[0.08512186]] [[0.04408896]]
[[0.08231453]] [[0.04043668]]
[[0.10480631]] [[0.11594507]]
[[0.11421281]] [[0.10104355]]
[[0.1003675]] [[0.08390804]]
[[0.11559952]] [[0.14346128]]
[[0.09434166]] [[0.05627473]]
[[0.09382473]] [[0.082513]]
[[0.10011362]] [[0.05895139]]
[[0.11661714]] [[0.12297109]]
[[0.09693776]] [[0.10517466]]
[[0.0960022]] [[0.07383003]]
[[0.11869867]] [[0.1526816]]
[[0.08773855]] [[0.03939178]]
[[0.09100518]] [[0.05655245]]
[[0.09857137]] [[0.09599196]]
[[0.10232614]] [[0.11856015]]
[[0.11591898]] [[0.19521864]]
[[0.0828976]] [[0.04404027]]
[[0.1083749]] [[0.11179368]]
[[0.09439359]] [[0.05

In [20]:
count = 0
for i in range(len(testingY)):
    if(predictedY[i]==testingY[i]):
        count = count + 1

accuracy = np.multiply(np.divide(count,len(testingY)),100)
print(accuracy)

63.888888888888886


In [21]:
confusion_matrix(testingY,predictedY)

array([[11,  3],
       [10, 12]])