In [None]:
import pandas as pd
import numpy as np

In [None]:
class SigmoidPerceptroon():

  def __init__(self, input_size ):
    # input_size is nothing but the shape of your data no of columns you have in your dataset.

    self.weights = np.random.rand(input_size)
    # Here the weights are nothing but the weights of the input features.
    # Rememeber the no of weights you have in a model is equal to the no of input features you have. We don't know how many columns our input feature has so we do that
    # The user can mention the shape of the data. It will initiate a weight array or weight vector which contains same no of values as no of columns in our data.

    self.bias = np.random.rand(1)
    # Here we are giving this as 1 because we know bias is a single scaler value. Here it is one so it will generate a single value.


  def sigmoid(self, z):
    # So we know we apply sigmoid function to the weighted sum so we call that as a z.

    return 1/(1+np.exp(-z))
    # That is the sigmoid function. That is the actual output we are sending from the model. The z will be calculated in the predict function.
    # Z is that w1*x1 + w2*x2 and we will pass this Z to the sigmoid activation function.

  def predict(self, inputs):
    # To predict it we need to give input values. This input we need to pass while calling the predict function.

    weighted_sum = np.dot(inputs, self.weights) + self.bias
    # here we are finding a dot product of a vector. If you multiply or do a cross product you won't get a scalar value but for dot product you get a scalar value.
    # We know input and weight will be in a form of a vector. input = [10,20,30] weight = [0.5,0.2,0.3] when you call dot it multiplies that 10 with 0.5
    # So what happens is we have a weight that is randomly generated and a bias that is randomly generated, to that you pass the input and then it will calculate the weighted
    # sum and then it will call the sigmoid function which gives us the output.

    return self.sigmoid(weighted_sum)
    # Here we are calling the sigmoid function using self.sigmoid and passing the weighed_sum

  def fit(self, inputs, targets, learning_rate, epochs):
    # Here in fit function we optimize the model. Update the weights and get the final optimized weight.
    # Meaning when you use that particular weight we would get the best prediction.
    # We keep updating this weight and bias until we reach a point where the accuracy cannot go further up. Or we can't reduce a loss value beyond that.

    # Targets are nothing but labels, we know learning rate, epochs

    num_examples = inputs.shape[0]
    # This num_examples is nothing but no of datapoints that we have

    for epoch in range(epochs): # This for loop runs for whatever number you mentioned in epochs.

      for i in range(num_examples):
        # We are using stochatist gradient descent where we will update the weights and parameters while training for each individual datapoints.
        # If you are using a normal gradient descent you wont have this for loop here. The only for loop you would have is the epoch one.
        # When we use SGD at each epoch we have another for loop that iterates over each individual datapoints. It will take one particular datapoint, predict the label
        # and compare that datapoint to that label.

        input_vector = inputs[i]  # Takes 1st row alone
        target = targets[i]

        prediction = self.predict(input_vector)
        # This self.predict parameter takes input as a parameters so that is nothing but the input_vector

        error = target-prediction

        # Update weights
        gradient_weights = error * prediction * (1-prediction) * input_vector # dw
        self.weights = learning_rate*gradient_weights

        # Update bias
        gradient_bias = error * prediction * (1-prediction) # db
        self.bias = learning_rate*gradient_bias

  def evaluate(self, inputs, targets):

    correct = 0
    # Just using this as a counter to find how many correct values our model is predicting.
    # We run this evaluation after the fit function.

    for input_vector, target in zip(inputs, targets): # Look below for more info on this
      prediction = self.predict(input_vector)

      if prediction >=0.5:
        predicted_class=1
      else:
        predicted_class=0

      if predicted_class==target: # we here give target from above and not the "targets" from this definition
        correct+=1

    accuracy = correct/len(inputs)

    return accuracy


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Starting the model**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
a = pd.read_csv("/content/drive/MyDrive/ML Datasets/diabetes.csv")
df = pd.DataFrame(a)

In [None]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [None]:
df.isnull().sum()

Unnamed: 0,0
Pregnancies,0
Glucose,0
BloodPressure,0
SkinThickness,0
Insulin,0
BMI,0
DiabetesPedigreeFunction,0
Age,0
Outcome,0


In [None]:
df["Outcome"].value_counts()

Unnamed: 0_level_0,count
Outcome,Unnamed: 1_level_1
0,500
1,268


In [None]:
class_zero = df[df.Outcome==0]

In [None]:
class_zero_new = class_zero.sample(n=268)

In [None]:
class_zero_new

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
3,1,89,66,23,94,28.1,0.167,21,0
657,1,120,80,48,200,38.9,1.162,41,0
33,6,92,92,0,0,19.9,0.188,28,0
677,0,93,60,0,0,35.3,0.263,25,0
491,2,89,90,30,0,33.5,0.292,42,0
...,...,...,...,...,...,...,...,...,...
464,10,115,98,0,0,24.0,1.022,34,0
225,1,87,78,27,32,34.6,0.101,22,0
62,5,44,62,0,0,25.0,0.587,36,0
146,9,57,80,37,0,32.8,0.096,41,0


In [None]:
class_one = df[df.Outcome==1]

In [None]:
class_one

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
2,8,183,64,0,0,23.3,0.672,32,1
4,0,137,40,35,168,43.1,2.288,33,1
6,3,78,50,32,88,31.0,0.248,26,1
8,2,197,70,45,543,30.5,0.158,53,1
...,...,...,...,...,...,...,...,...,...
755,1,128,88,39,110,36.5,1.057,37,1
757,0,123,72,0,0,36.3,0.258,52,1
759,6,190,92,0,0,35.5,0.278,66,1
761,9,170,74,31,0,44.0,0.403,43,1


In [None]:
new_df = pd.concat([class_zero_new, class_one], axis=0)

In [None]:
new_df.shape

(536, 9)

In [None]:
new_df["Outcome"].value_counts()

Unnamed: 0_level_0,count
Outcome,Unnamed: 1_level_1
0,268
1,268


In [None]:
new_df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
3,1,89,66,23,94,28.1,0.167,21,0
657,1,120,80,48,200,38.9,1.162,41,0
33,6,92,92,0,0,19.9,0.188,28,0
677,0,93,60,0,0,35.3,0.263,25,0
491,2,89,90,30,0,33.5,0.292,42,0
...,...,...,...,...,...,...,...,...,...
755,1,128,88,39,110,36.5,1.057,37,1
757,0,123,72,0,0,36.3,0.258,52,1
759,6,190,92,0,0,35.5,0.278,66,1
761,9,170,74,31,0,44.0,0.403,43,1


In [None]:
X = new_df.drop(columns="Outcome").values

In [None]:
Y = new_df["Outcome"].values

In [None]:
X

array([[1.000e+00, 8.900e+01, 6.600e+01, ..., 2.810e+01, 1.670e-01,
        2.100e+01],
       [1.000e+00, 1.200e+02, 8.000e+01, ..., 3.890e+01, 1.162e+00,
        4.100e+01],
       [6.000e+00, 9.200e+01, 9.200e+01, ..., 1.990e+01, 1.880e-01,
        2.800e+01],
       ...,
       [6.000e+00, 1.900e+02, 9.200e+01, ..., 3.550e+01, 2.780e-01,
        6.600e+01],
       [9.000e+00, 1.700e+02, 7.400e+01, ..., 4.400e+01, 4.030e-01,
        4.300e+01],
       [1.000e+00, 1.260e+02, 6.000e+01, ..., 3.010e+01, 3.490e-01,
        4.700e+01]])

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=2, stratify=Y)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

(536, 8) (428, 8) (108, 8)


In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X_train)

In [None]:
X_train = scaler.transform(X_train)

In [None]:
X_test = scaler.transform(X_test)

In [None]:
X_train.std()

np.float64(1.0)

In [None]:
X_test.std()

np.float64(1.0640736822907082)

# **Now training, we have to load the class above called SigmoidPerceptroon**

In [None]:
model = SigmoidPerceptroon(input_size = X_train.shape[1])

In [None]:
model.fit(inputs = X_train, targets = Y_train, learning_rate = 0.02, epochs=100)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
X_train_prediction = model.evaluate(X_train, Y_train)
print(X_train_prediction)

0.616822429906542


In [None]:
X_test_prediction = model.evaluate(X_test, Y_test)
print(X_test_prediction)

0.5833333333333334
