In [1]:
''' 
Equation of the Hyperplane:

y = wx - b

Gradient Descent:

Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.

w = w - α*dw

b = b - α*db

Learning Rate:

Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function.
'''

' \nEquation of the Hyperplane:\n\ny = wx - b\n\nGradient Descent:\n\nGradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n\nw = w - α*dw\n\nb = b - α*db\n\nLearning Rate:\n\nLearning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function.\n'

In [2]:
#importing the dependencies 
import numpy as np 

In [3]:
#Support vector machine classifier 

In [4]:
class svm_classifier(): 
    #for hyperparamers  self, 
    def __init__(self, learning_rate , no_of_iteration, lambda_parameter):
        self.learning_rate = learning_rate
        self.no_of_iteration = no_of_iteration
        self.lambda_parameter = lambda_parameter
        
    #fitting the dataset to svm classifier 
    def fit(self, X, Y):
        #m - no of datapoints -> no of rows 
        #n- no of input features -> no of columns , no of weights 
        #X  = no of features 
        #Y  = no of outcomes  
        self.m, self.n= X.shape

        #initating the weight and bias value 
        self.w = np.zeros(self.n)
        self.b = 0

        self.X=X
        self.Y=Y 

        #implementing grediant descent algorithm for Optimization 
        for i in range (self.no_of_iteration): 
            self.update_weights()
        
        
    #function for updating the weight and bias value 
    def update_weights(self): 

        #label encoding 
        y_label = np.where(self.Y <=0, -1, 1) #if Y==0 ,it will convert to -1
        
        #graidents(dw,db) 
        for index, x_i in enumerate(self.X):
            #enumerate gives particular index value and its data 

            condition = y_label[index] * (np.dot(x_i,self.w)-self.b) >=1

            if(condition==True): 

                dw = 2 * self.lambda_parameter * self.w
                db = 0 
            else: 
                dw = 2 * self.lambda_parameter * self.w - np.dot(x_i,y_label[index])
                db = y_label[index]

            self.w = self.w - self.learning_rate * dw 
            self.b = self.b - self.learning_rate * db 

    #predicting the label for given input value 
    def predict(self,X): 
        output = np.dot(X, self.w) - self.b
        predicted_labels = np.sign(output)

        y_hat = np.where(predicted_labels <= -1, 0 , 1)

        return y_hat
        
    

In [5]:
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 

In [6]:
dia_data = pd.read_csv("diabetes.csv")
dia_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [7]:
dia_data.shape

(768, 9)

In [9]:
dia_data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [10]:
dia_data['Outcome'].value_counts()

Outcome
0    500
1    268
Name: count, dtype: int64

In [11]:
features = dia_data.drop(columns='Outcome',axis=1)
target = dia_data['Outcome']

In [12]:
scaler = StandardScaler()

In [13]:
scaler.fit(features)
standardized_data = scaler.transform(features)

In [14]:
print(standardized_data)

[[ 0.63994726  0.84832379  0.14964075 ...  0.20401277  0.46849198
   1.4259954 ]
 [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078
  -0.19067191]
 [ 1.23388019  1.94372388 -0.26394125 ... -1.10325546  0.60439732
  -0.10558415]
 ...
 [ 0.3429808   0.00330087  0.14964075 ... -0.73518964 -0.68519336
  -0.27575966]
 [-0.84488505  0.1597866  -0.47073225 ... -0.24020459 -0.37110101
   1.17073215]
 [-0.84488505 -0.8730192   0.04624525 ... -0.20212881 -0.47378505
  -0.87137393]]


In [16]:
features = standardized_data

In [17]:
x_train,x_test,y_train,y_test = train_test_split(features,target,test_size=0.2,random_state=42)

In [18]:
classifier = svm_classifier(learning_rate=0.001,no_of_iteration=1000,lambda_parameter=0.01)

In [19]:
classifier.fit(x_train,y_train)

In [20]:
x_train_predict = classifier.predict(x_train)
trainin_data_accuracy = accuracy_score(y_train,x_train_predict)
print(trainin_data_accuracy)

0.7752442996742671


In [21]:
x_test_predict = classifier.predict(x_test)
test_data_accuracy = accuracy_score(x_test_predict,y_test)
print(test_data_accuracy)

0.7597402597402597
