In [39]:
import numpy as np

In [40]:
class SVM_classifier:

    # initiating the hyperparameters
    def __init__(self, learning_rate, no_of_iterations, lambda_parameter):
        self.learning_rate = learning_rate
        self.no_of_iterations = no_of_iterations
        self.lambda_parameter = lambda_parameter

    # fitting the data to svm classifier
    def fit(self, x, y):

        # m -> total number of data points
        # n -> total number of features
        self.m, self.n = x.shape

        # x -> features columns
        # y -> target column
        self.x = x
        self.y = y

        # initiating weights(w) and bias(b) values
        self.b = 0
        self.w = np.zeros(self.n)

        # implementing the gradient descent algorithm
        for i in range(self.no_of_iterations):
            self.update_weights()

    # updating the weight and bias values for the model
    def update_weights(self):

        # label encoding
        y_label = np.where(self.y<=0, -1, 1)

        # gradients (dw, db)
        for index,x_i in enumerate(self.x):

            condition = y_label[index]*(np.dot(x_i, self.w) -self.b) >= 1 

            if condition == True:
                dw = 2*self.lambda_parameter*self.w
                db = 0
            
            else:
                dw = 2*self.lambda_parameter*self.w - np.dot(x_i, y_label[index])
                db = y_label[index]

            self.w = self.w - self.learning_rate*dw
            self.b = self.b - self.learning_rate*db

    # predicting the label of the given input data point
    def predict(self, x):

        output = np.dot(x, self.w) - self.b
        predicted_labels = np.sign(output)

        y_hat = np.where(predicted_labels<=-1, 0, 1)

        return y_hat

# making predictions using this model

In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [42]:
df = pd.read_csv(r"C:\Users\22213\OneDrive\Desktop\CSV\diabetes.csv")

In [43]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [45]:
input_features = df.drop(columns='Outcome', axis=1)
target_column = df['Outcome']

In [46]:
scaler = StandardScaler()

In [47]:
input_features = scaler.fit_transform(input_features)

In [48]:
x_train, x_test, y_train, y_test = train_test_split(input_features, target_column, test_size=.15, stratify=target_column)

In [49]:
model = SVM_classifier(no_of_iterations=10000, lambda_parameter=.01, learning_rate=.01)

In [50]:
model.fit(x_train, y_train)

# predicting

In [54]:
input_data = [1,85,66,29,0,26.6,0.351,31]
input_data = np.asarray(input_data)
input_data = input_data.reshape(1,-1)
std_data = scaler.transform(input_data)
p = model.predict(std_data)
if p[0] == 1:
    print('diabetic')
else:
    print('not diabetic')

not diabetic




# accuracy score

In [52]:
# accuracy score on train data
x_train_pred = model.predict(x_train)
train_data_accuracy = accuracy_score(y_train, x_train_pred)
print('Accuracy score for training data:', train_data_accuracy)

Accuracy score for training data: 0.7791411042944786


In [53]:
# accuracy score on test data
x_test_pred = model.predict(x_test)
test_data_accuracy = accuracy_score(y_test, x_test_pred)
print('Accuracy score for test data:', test_data_accuracy)

Accuracy score for test data: 0.7327586206896551
