In [25]:
import numpy as np

Support vector machine classifier:

In [26]:
class svm_classifier():

  def __init__(self, learning_rate, no_of_iterations, lambda_parameter):
    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.lambda_parameter = lambda_parameter

  def fit(self, X, Y):

    # m --> no of datapoints --> no of rows
    # n --> no of input features --> no of columns
    self.m, self.n = X.shape

    # initiating weight and bias values
    self.w = np.zeros(self.n)
    self.b = 0

    self.X = X
    self.Y = Y

    # implementing gradient descent for optimization

    for i in range(self.no_of_iterations):
      self.update_weights()


  def update_weights(self):

    # label encoding
    y_label = np.where(self.Y <= 0, -1, 1)

    for index, x_i in enumerate(self.X): # enumertae function returns the index value and list value

      condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1 # Yi*(w*Xi - b)

      if condition.all():

        dw = 2 * self.lambda_parameter * self.w
        db = 0

      else:

        dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])
        db = y_label[index]

      self.w = self.w - self.learning_rate * dw

      self.b = self.b - self.learning_rate * dw


  def predict(self, X):

    output = X.dot(self.w) - self.b

    predicted_labels = np.sign(output) # will ouptut either +1 or -1

    y_hat = np.where(predicted_labels <= -1, 0, 1)

    return y_hat


In [27]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [28]:
df = pd.read_csv("/content/diabetes.csv")

In [29]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [30]:
df.shape

(768, 9)

In [31]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [32]:
df['Outcome'].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [33]:
features = df.drop(columns='Outcome', axis=1)
target = df['Outcome']

In [34]:
scaler = StandardScaler()

In [35]:
scaler.fit(features)

In [36]:
standardized_data = scaler.transform(features)

In [37]:
features = standardized_data

In [38]:
xtrain, xtest, ytrain, ytest = train_test_split(features, target, test_size = 0.2, random_state = 2)

In [39]:
model = svm_classifier(0.001, 1000, 0.01)

In [40]:
model.fit(xtrain, ytrain)

In [41]:
xtrain.shape

(614, 8)

In [42]:
w1 = model.w

In [43]:
np.transpose(w1).shape

(8,)

In [44]:
w1.reshape(-1,1).shape

(8, 1)

In [45]:
model.b

array([ 0.61204459,  1.64645969, -0.28439235, -0.05833918, -0.11446761,
        0.76376456,  0.48314408,  0.18199597])

In [46]:
xtrain_prediction = model.predict(xtrain)

xtest_prediction = model.predict(xtest)

ValueError: ignored

In [None]:
df = pd.DataFrame(xtrain_prediction)
df2 = pd.DataFrame(ytrain)
df

In [None]:
rand_test_pred = accuracy_score(ytrain, arr)
rand_test_pred