In [1]:
import numpy as np
import pandas as pda
from sklearn.model_selection import train_test_split as tts
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [2]:
#reading the csv file with the help of pandas
heartData = pda.read_csv('heart_data.csv')

In [3]:
heartData
# Age
# Sex: Male => 1.0 , Female => 0.0
# cp_type: 1 => typical angina , 2 => atypical angina
#          3 => non- anginal pain, 4 => asymptomatic
# trestbps: resting blood pressure (in mm Hg on admission to the hospital)
# chol: serum cholestoral in mg/dl
# fbs: fasting blood sugar > 120 mg/dl (1 = true; 0 = false)
# restecg: resting electrocardiographic results (0 = normal, 1= having ST-T wave abnormality (T wave)
# thalach: maximum heart rate achieved during thalium stress test
# exang: exercise induced angina (1 = yes; 0 = no)
# oldspeak: ST depression induced by exercise relative to rest
# slope: the slope of the peak exercise ST segment (1 = upsloping, 2 = flat, 3 = downsloping)
# ca: number of major vessels (0-3) colored by flourosopy
# thal: results of thallium stress test (3 = normal; 6 = fixed defect; 7 = reversable defect )
# label: our y (1 = yes , 0 = no)

Unnamed: 0,Age,Sex,cp_type,trestbps,chol,fbs,restecg,thalach,exang,oldspeak,slope,ca,thal,label
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,1
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,1
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,1
301,57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,1.0,3.0,1


In [4]:
# modifying the last column from 1 and 0 to 1 and -1 
heartData['label'].replace((1,0),(1,-1),inplace = True)

In [5]:
trainingSet,testingSet = tts(heartData,test_size=0.3,random_state= 7)

In [6]:
trainingX = np.array(trainingSet[["Age","trestbps"]])
trainingY = np.array(trainingSet["label"])

In [7]:
testingX = np.array(testingSet[["Age","trestbps"]])
testingY = np.array(testingSet["label"])

In [8]:
def normalise(sample):
    mu = np.mean(sample,axis=0)
    std_dev = np.std(sample,axis=0)
    sample = (sample-mu)/std_dev
    return sample

In [9]:
trainingX = normalise(trainingX)
testingX = normalise(testingX)

In [10]:
def gaussianKernel(s1,s2,sig):
    temp = 0
    temp = np.exp(-np.sum(abs(np.square(s1-s2))/(2*(sig**2))))
    return temp

In [11]:
class SupportVectorMachine():
    def trainSVM(self,trainingX,trainingY,C,sig,tol = 1e-3,maxCount = 5,args = ()):
        Y = trainingY.astype(int)
        m,n = trainingX.shape
        
        count = 0
        E = np.zeros(m)
        alphas = np.zeros(m)
        b = 0
        
        X2 = np.sum(trainingX**2,axis = 1)
        K = X2 + X2[:,None] - 2*np.dot(trainingX,np.transpose(trainingX))
        
        if len(args) > 0:
            K /= 2*args[0]**2
            
        K = np.exp(-K)
        
        K = np.zeros((m,m))
        for i in range(m):
            for j in range(i,m):
                K[i,j] = gaussianKernel(trainingX[i,:],trainingX[j,:],sig)
                K[j,i] = K[i,j]
                
        while count < maxCount:
            noOfAlphas = 0
            for i in range(m):
                E[i] = b + np.sum(alphas * trainingY * K[:, i]) - trainingY[i]
                
                if (trainingY[i]*E[i] < -tol and alphas[i] < C) or (trainingY[i]*E[i] > tol and alphas[i] > 0):
                    
                    j = np.random.choice(list(range(i)) + list(range(i+1,m)), size = 1)[0]
                    E[j] = b + np.sum(alphas*trainingY*K[:,j]) - trainingY[j]
                    
                    alphaIOld = alphas[i]
                    alphaJOld = alphas[j]
                    
                    if trainingY[i] == trainingY[j]:
                        L = max(0,alphas[j]+alphas[i]-C)
                        H = min(C,alphas[j]+alphas[i])
                    else:
                        L = max(0,alphas[j]-alphas[i])
                        H = min(C,C + alphas[j] - alphas[i])
                        
                    if L == H:
                        continue
                        
                    eeta = 2*K[i,j] - K[i,i] -K[j,j]
                    if eeta>=0:
                        continue
                        
                    alphas[j] -= trainingY[j]*(E[i] - E[j])/eeta
                    alphas[j] = max(L,min(H,alphas[j]))
                    
                    if abs(alphas[j] -alphaJOld) < tol:
                        alphas[j] = alphaJOld
                        continue
                    alphas[i] += trainingY[i]*trainingY[j]*(alphaJOld - alphas[j])
                    
                    b1 = b - E[i] - trainingY[i]*(alphas[i]-alphaIOld)*K[i,j]-Y[j]*(alphas[j]-alphaJOld)*K[i,j]
                    b2 = b - E[j] - trainingY[i]*(alphas[i]-alphaIOld)*K[i,j]-Y[j]*(alphas[j]-alphaJOld)*K[j,j]
                    
                    if 0 < alphas[i] < C:
                        b = b1
                    elif 0 < alphas[j] < C:
                        b = b2
                    else:
                        b = (b1+b2)/2
                        
                    noOfAlphas +=1
            if noOfAlphas ==0:
                count+=1
            else:
                count=0
        index = alphas > 0
        model = {'X': trainingX[index,:],
                 'y': trainingY[index],
                 'b': b,
                 'args': args,
                 'alphas': alphas[index],
                 'w': np.dot(alphas*trainingY,trainingX)}
        return model
    
    def predictSVM(self,model,testingX):
        if testingX.ndim == 1:
            testingX = testingX[np.newaxis]
            
        m = testingX.shape[0]
        p = np.zeros(m)
        predicted = np.zeros(m)
        xtrans = np.transpose(testingX)
        
        X1 = np.sum(testingX**2,1)
        X2 = np.sum(model['X']**2,1)
        K = X2 + X1[:, None] - 2*np.dot(testingX,np.transpose(model['X']))
        
        if len(model['args']) > 0:
            K /= 2*model['args'][0]**2
            
            K = np.exp(-K)
            p = np.dot(K,model['alphas']*model['y']) + model['b']
        else:
            for i in range(m):
                pred = 0
                for j in range(model['X'].shape[0]):
                    pred += model['alphas'][i]*model['y'][j]*gaussianKernel(testingX[i, :],model['X'][j, :],2)
                p[i] = pred
                predicted[p >=0]=1
                predicted[p<0]= -1
                return predicted

In [12]:
C = 0.2
sigma = 2

In [13]:
svmObject = SupportVectorMachine()
model = svmObject.trainSVM(trainingX,trainingY,C,sigma)
predictedY = svmObject.predictSVM(model,testingX)

In [14]:
tot = len(testingY)
count =0
for i in range(tot):
    if(predictedY[i] == testingY[i]):
        count += 1
        
accuracy = count/tot
print("Accuracy:", np.multiply(accuracy,100))


Accuracy: 56.043956043956044


In [15]:
confusion_matrix(testingY,predictedY)

array([[ 1, 40],
       [ 0, 50]])