## Support Vector Machine From Scratch

<img src="assets/finding_w.png">
<img src="assets/system.png">
<img src="assets/distance.png">
<img src="assets/lagrangien.png">

### Reading Datasets

In [1]:
import numpy as np
import pandas as pd

df = pd.read_table('../dataset/occupancy/datatraining.txt', skiprows=1, delimiter=',', names=('A', 'B', 'C', 'D', 'E', 'F', 'Occ'))
test = pd.read_table('../dataset/occupancy/datatest.txt', skiprows=1, delimiter=',', names=('A', 'B', 'C', 'D', 'E', 'F', 'Occ'))

test = test[['B', 'C', 'D', 'E', 'F', 'Occ']]
df = df[['B', 'C', 'D', 'E', 'F', 'Occ']]
df.head()

Unnamed: 0,B,C,D,E,F,Occ
1,23.18,27.272,426.0,721.25,0.004793,1
2,23.15,27.2675,429.5,714.0,0.004783,1
3,23.15,27.245,426.0,713.5,0.004779,1
4,23.15,27.2,426.0,708.25,0.004772,1
5,23.1,27.2,426.0,704.5,0.004757,1


In [2]:
train_cols = ['B', 'C', 'D', 'E', 'F']
df['Occ'][df['Occ'] == 0] = -1

### Finds closest data of the given class

In [3]:
def norm(w):
    return np.sum(w**2)**0.5

def distance(w, row, b):
    return abs(w.T.dot(row.T) + b)/norm(w)
    
def closest_point(w, b, cls):
    dis = float('+inf')
    df_cls = df[df['Occ']==cls][train_cols]
    for i in range(len(df_cls)):
        if float(distance(w, np.array(df_cls[i:i+1])[0, :], b)) < dis:
            dis = float(distance(w, np.array(df_cls[i:i+1])[0, :], b))
            min_dis = df_cls[i:i+1]
    return min_dis

### Creates random weights

In [4]:
def random_wt():
    w = []
    for i in train_cols:
        w.append(np.random.choice(np.array(df[i])))
    print(w)
    return w

### Training SVM

In [5]:
def svm(lr, b, epochs):
    w = pd.DataFrame(random_wt())
    lamb = 1e-2
    for i in range(epochs):
        # learning rate
        l = lr - (i*lr)/10
        
        for j in range(len(df)):
            row = np.array(df[j:j+1])
            if (float((w.T.dot(row[0, :-1].T)+b)*row[0, -1]))<1:
                t = np.array(-lamb*lr*row[0, -1]*row[0, :-1]).reshape((5,1))
                w = np.subtract(w, t)
        
        closest_one = closest_point(w, b, 1)
        closest_minus_one = closest_point(w, b, -1)
        b = -1*(w.T.dot(np.array(closest_one)[0, :]) + w.T.dot(np.array(closest_minus_one)[0, :]))*0.5

    print(w)
    return b, w

In [6]:
lr = 0.3
# taking random intercept
b = np.random.rand()
epochs = 10

# training SVM
b, w = svm(lr, b, epochs)

# Intercept after training
print(b)

[21.2, 23.025, 405.0, 485.5, 0.00310618168450477]
            0
0    1.015130
1   -0.989516
2  313.072750
3  -26.638850
4   -0.000792
0   -107110.720013
dtype: float64


### Testing with test set

In [7]:
def svm_test(w, b):
    predictions = list()
    for i in range(len(test)):
        row = np.array(test[i:i+1])[0,:-1]
        cls = int(np.sign(np.dot(w.T,row.T)+b))
        predictions.append(cls)
    return predictions

In [8]:
pred = svm_test(w, b)

### Calculating Accuracy

In [9]:
y_out = np.array(pred)
y_out[y_out==-1] = 0
gr_val = np.array(test['Occ'])

tp_bool = np.logical_and((y_out==1),(gr_val==1))
tn_bool = np.logical_and((y_out==0),(gr_val==0))
fp_bool = np.logical_and((y_out==1),(gr_val==0))
fn_bool = np.logical_and((y_out==0),(gr_val==1))
tp = len(y_out[tp_bool])
tn = len(y_out[tn_bool])
fp = len(y_out[fp_bool])
fn = len(y_out[fn_bool])
print(tp,tn,fp,fn)

accuracy = (tp+tn)/(tp+tn+fp+fn)
print("accuracy : "+str(accuracy))

# precision = tp/(total yes predictions)
precision = tp/(len(y_out[y_out==1]))
print("precision : "+str(precision))

specificity = tp/(tp+fn)
print("specificity : "+str(specificity))

sensitivity = tn/(tn+fp)
print("sensitivity : "+str(sensitivity))

183 435 23 20
accuracy : 0.9349470499243571
precision : 0.8883495145631068
specificity : 0.9014778325123153
sensitivity : 0.9497816593886463
