In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

In [2]:
data = pd.read_csv("diabetes.csv")

In [3]:
data.head(10)

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive
5,5,116,74,0,0,25.6,30,negative
6,3,78,50,32,88,31.0,26,positive
7,10,115,0,0,0,35.3,29,negative
8,2,197,70,45,543,30.5,53,positive
9,8,125,96,0,0,0.0,54,positive


In [4]:
x = data.iloc[:,:-1].values
y_str = data.iloc[:,-1].values

In [5]:
x

array([[  6. , 148. ,  72. , ...,   0. ,  33.6,  50. ],
       [  1. ,  85. ,  66. , ...,   0. ,  26.6,  31. ],
       [  8. , 183. ,  64. , ...,   0. ,  23.3,  32. ],
       ...,
       [  5. , 121. ,  72. , ..., 112. ,  26.2,  30. ],
       [  1. , 126. ,  60. , ...,   0. ,  30.1,  47. ],
       [  1. ,  93. ,  70. , ...,   0. ,  30.4,  23. ]])

In [6]:
x.shape

(768, 7)

In [7]:
len(y_str)

768

In [8]:
np.unique(y_str)

array(['negative', 'positive'], dtype=object)

In [9]:
y_int = [1 if s=='positive' else 0 for s in y_str]

In [10]:
y = np.array(y_int,dtype = "float64")

In [11]:
sc = StandardScaler()
x = sc.fit_transform(x)

In [12]:
x

array([[ 0.63994726,  0.84832379,  0.14964075, ..., -0.69289057,
         0.20401277,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.69289057,
        -0.68442195, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -0.69289057,
        -1.10325546, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ...,  0.27959377,
        -0.73518964, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.69289057,
        -0.24020459,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.69289057,
        -0.20212881, -0.87137393]])

In [13]:
x = torch.tensor(x)
y = torch.tensor(y)

In [14]:
x.shape

torch.Size([768, 7])

In [15]:
y.shape

torch.Size([768])

In [16]:
# for binary cross entropy both label and input should be of same dimension ! so we unsqueeze y !!

In [17]:
y = y.unsqueeze(1) # or y.unsqueeze_(1) and since this is an inplace operation so we don't need to reassign it to y !!

DO NOT RERUN THE UNSQUEEZE CELL !!! this will create one more dimension !!!

In [19]:
print(x.shape,y.shape)

torch.Size([768, 7]) torch.Size([768, 1])


In [20]:
class Dataset(Dataset) : 
    def __init__(self,x,y) :
        self.x = x
        self.y = y
    def __getitem__(self,index) : 
        return self.x[index],self.y[index]
    def __len__(self) :
        return len(self.x)
    

In [21]:
dataset = Dataset(x,y)

In [22]:
len(dataset)

768

In [23]:
from torch.utils.data import DataLoader

In [24]:
train_loader = DataLoader(dataset=dataset,
                          batch_size=32,
                          shuffle = True)

In [25]:
print(f"There are {len(train_loader)} batches in the dataset")
for (x,y) in train_loader :
    print("for one iteration (batch), there is :")
    print(f"Data : {x.shape}\nLabels : {y.shape}")
    break

There are 24 batches in the dataset
for one iteration (batch), there is :
Data : torch.Size([32, 7])
Labels : torch.Size([32, 1])


In [28]:
class Model(nn.Module) : # inheritance in python
    def __init__(self,input_features,output_features) : 
        super(Model,self).__init__() # this function just takes the methods of parent class and pass it to inherited class !
        self.fc1 = nn.Linear(input_features,5)
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        self.fc4 = nn.Linear(3,output_features)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
    def forward(self,x):
        out = self.fc1(x)
        out = self.tanh(out) # every layer is activated using tanh()
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out) # last layer is activated using sigmoid()
        return out 

In [37]:
network = Model(7,1)
criterion = nn.BCELoss(reduction="mean") # earlier, the arg was size_average = True !!
optimizer = torch.optim.SGD(network.parameters(),lr = 0.01, momentum=0.9)

In [38]:
epochs = 200
for e in range(epochs) : 
    for inputs,labels in train_loader : 
        inputs = inputs.float()
        labels = labels.float()
        # forward pass
        outputs = network(inputs) # or we can just call net.forward(inputs) !!
        loss = criterion(outputs,labels)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
    # accuracy per epoch
    output = (outputs>0.5).float() # > compares correspoding elements !!
    acc = (output==labels).float().mean() # == compares corresponding elements !!
    print(f"epoch no. {e+1} : loss = {loss:.3f}, acc = {acc:.3f}")

epoch no. 1 : loss = 0.672, acc = 0.625
epoch no. 2 : loss = 0.693, acc = 0.531
epoch no. 3 : loss = 0.561, acc = 0.781
epoch no. 4 : loss = 0.552, acc = 0.719
epoch no. 5 : loss = 0.534, acc = 0.719
epoch no. 6 : loss = 0.481, acc = 0.875
epoch no. 7 : loss = 0.631, acc = 0.688
epoch no. 8 : loss = 0.615, acc = 0.656
epoch no. 9 : loss = 0.491, acc = 0.781
epoch no. 10 : loss = 0.429, acc = 0.844
epoch no. 11 : loss = 0.550, acc = 0.750
epoch no. 12 : loss = 0.307, acc = 0.875
epoch no. 13 : loss = 0.465, acc = 0.781
epoch no. 14 : loss = 0.407, acc = 0.875
epoch no. 15 : loss = 0.479, acc = 0.781
epoch no. 16 : loss = 0.615, acc = 0.625
epoch no. 17 : loss = 0.547, acc = 0.750
epoch no. 18 : loss = 0.345, acc = 0.875
epoch no. 19 : loss = 0.378, acc = 0.844
epoch no. 20 : loss = 0.442, acc = 0.781
epoch no. 21 : loss = 0.550, acc = 0.688
epoch no. 22 : loss = 0.605, acc = 0.656
epoch no. 23 : loss = 0.467, acc = 0.781
epoch no. 24 : loss = 0.480, acc = 0.750
epoch no. 25 : loss = 0.4