IMPORTING THE DEPENDENCIES

In [1]:
import numpy as np

SVM CLASSIFIER

In [2]:
class svm_classifier():

    #initiating the hyperparameters
    def __init__(self, learning_rate, no_of_iteration, lambda_parameter):
        self.learning_rate = learning_rate
        self.no_of_iteration = no_of_iteration
        self.lambda_parameter = lambda_parameter

    # fitting the dataset to svm classifier
    def fit(self, X, Y):
        self.m, self.n = X.shape
        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        for i in range(self.no_of_iteration):
            self.update_weights() 

    # function for updating weight and bias value
    def update_weights(self):

        # label encoding
        y_label = np.where(self.Y <= 0, -1, 1)

        for index, x_i in enumerate(self.X):
            condition = y_label[index] * (np.dot(x_i, self.w) - self.b)>=1
            if(condition == True):
                dw = 2* self.lambda_parameter * self.w
                db = 0
            else:
                dw = 2* self.lambda_parameter * self.w - np.dot(x_i, y_label[index])
                db = y_label[index] 
            self.w = self.w - self.learning_rate * dw
            self.b = self.b - self.learning_rate * db
        
    # predict the label for a given input value
    def predict(self, X):
        output = np.dot(X, self.w) - self.b
        predicted_labels = np.sign(output)
        y_hat = np.where(predicted_labels == -1, 0, 1) 
        return y_hat

Importing the dependencies

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler # to standardise the data in same range
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

Data Collection and Processing

In [4]:
# loading the data from csv file to pandas dataframe
diabetes_data = pd.read_csv('datasets/diabetes.csv')

In [5]:
diabetes_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
diabetes_data.shape

(768, 9)

In [7]:
# df.describe() => for statistical measures
diabetes_data['Outcome'].value_counts()

Outcome
0    500
1    268
Name: count, dtype: int64

0 => non-diabetic
1 => diabetic

In [8]:
# separating the features and targets
features = diabetes_data.drop(columns = 'Outcome', axis = 1) 

target = diabetes_data['Outcome']

Data Standardization

In [9]:
scaler = StandardScaler()

In [10]:
scaler.fit(features)

In [11]:
std_data = scaler.transform(features)

In [12]:
features = std_data
target = diabetes_data['Outcome']

Train Test Split

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size = 0.2, random_state = 2 )

Training the model

Support Vector Machine Classifier

In [14]:
classifier = svm_classifier(learning_rate = 0.001, no_of_iteration=1000, lambda_parameter=0.01)

In [15]:
# training the SVM classifier with training data
classifier.fit(X_train, Y_train)

Model Evaluation

In [16]:
# accuracy on training data
X_train_prediction = classifier.predict(X_train)

training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [17]:
print(training_data_accuracy)

0.7768729641693811


In [18]:
# accuracy on test data
X_test_prediction = classifier.predict(X_test)

test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

print(test_data_accuracy)

0.7532467532467533


Building a predictive system

In [19]:
input_data = (5,166,82,20,175,25.8,0.587,51)

# change the input data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardizing the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if (prediction[0] == 0):
  print('The person is not diabetic')

else:
  print('The Person is diabetic')

[[ 0.3429808   1.41167241  0.66661825 -0.03365099  0.82661621 -0.78595734
   0.34768723  1.51108316]]
[1]
The Person is diabetic


