In [None]:
#importing all the required libraries
import numpy as np 
import pandas as pd             
from matplotlib import pyplot as plt
%matplotlib inline

from random import shuffle

In [None]:
# downloading the datset 
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00229/Skin_NonSkin.txt" 
data = pd.read_csv(url, header = None, sep='\t')

np.random.seed(1)

msk = np.random.rand(len(data)) < 0.8
data[msk].to_csv('train.csv', header=False, index=False)
data[~msk].to_csv('test.csv', header=False, index=False)

In [None]:
train = data[msk]  #separating training data

In [None]:
test = data[~msk]   #separating testing data

In [None]:
df = train.append(test)

In [None]:
df

Unnamed: 0,0,1,2,3
0,74,85,123,1
1,73,84,122,1
2,72,83,121,1
3,70,81,119,1
4,70,81,119,1
...,...,...,...,...
245048,163,162,112,2
245049,163,162,112,2
245050,163,162,112,2
245053,163,162,112,2


In [None]:
df.iloc[:, :3] = (df - df.mean())/df.std() #normalizing data 

In [None]:
df[3] =df[3].apply(lambda x: -1 if x>1 else 1) #assigning target values as 1's and -1's since in SVM classify data as 1 and -1

In [None]:
df

Unnamed: 0,0,1,2,3
0,-0.820254,-0.792566,-0.002441,1
1,-0.836317,-0.809249,-0.016223,1
2,-0.852380,-0.825932,-0.030004,1
3,-0.884505,-0.859298,-0.057567,1
4,-0.884505,-0.859298,-0.057567,1
...,...,...,...,...
245048,0.609335,0.492027,-0.154036,-1
245049,0.609335,0.492027,-0.154036,-1
245050,0.609335,0.492027,-0.154036,-1
245053,0.609335,0.492027,-0.154036,-1


In [None]:
df = df.sample(frac = 1)               #shuffling the data

In [None]:
X = df.iloc[:, :3]

In [None]:
ones = np.ones([len(df), 1])           #creating bias values as 1's

In [None]:
X = np.concatenate((ones, X), axis=1)  #adding bias to the features

In [None]:
y = df.iloc[:, 3:].values

In [None]:
train_X = X[:200000]

In [None]:
train_X.shape

(200000, 4)

In [None]:
train_y = y[:200000]

In [None]:
train_y.shape

(200000, 1)

In [None]:
test_X = X[200000:]

In [None]:
test_y = y[200000:]

In [None]:
# creating class interface for SVM 
class SVM():
  def __init__(self, X,y,alpha =0.01, lmbda= 0.01, iterations =500):
    self.alpha = alpha 
    self.iterations = iterations
    self.lmbda = lmbda
    self.train_X= train_X
    self.train_y = train_y
    self.X = X
    self.y = df.iloc[:, :3].values
    self.w = np.zeros([1, 3+1]).T
    self.h = np.sign(np.dot(X, self.w))                     # this is the hypothesis we need to calculate 
    self.coef = None
    self.intercept = None

  def hinge_loss(self,X,y,w):                               # function for calculating loss(Hinge loss is used in SVM)
    self.result = max(0, (1-self.h*y))
    return self

  def fit(self):                                            # training the model using gradient descent 
    self.loss = np.zeros(self.iterations)
    for iter in range(self.iterations):
      condition = y * self.h < 1                            # condition to be satisfied to update the parameters 
      if condition.any():
        self.w -= self.alpha * (2 * self.lmbda * self.w - np.dot(train_X.T, train_y))   # if the above condition does not satisfy we update this 
      else:                                                                             # otherwise we update only the regularization param
        self.w -= self.alpha * (2 * self.lmbda * self.w)
        self.loss = self.hinge_loss(train_X, train_y, self.w)
    return self

  def predict(self, X):                                     # function to find target 
    y = np.sign(np.dot(X, self.w))
    return y

  def get_w(self):
    return self.w  


In [None]:
clf= SVM(X,y).fit()                       

In [None]:
param = clf.get_w()                       # these are the optimal parameters for this model
param

array([[-556525.63235705],
       [ -72801.67160664],
       [  91423.50552418],
       [ 439140.89581712]])

In [None]:
pred_y = clf.predict(test_X)              # predicting targets for the test data

In [None]:
from sklearn import metrics

In [None]:
print('Accuracy: %2.2f %%' % (100. * metrics.accuracy_score(test_y, pred_y)))

Accuracy: 87.37 %
