In [1]:
# import libraries
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [2]:
# Load the dataset using Pandas
data = pd.read_csv('diabetes.csv')

Preprocess Data

In [3]:
# Preprocess data
# For x: Extract out the dataset for input (all cols except the last)
# For y: Extract out the dataset for output (last col)
# Convert x & y to numpy usind .values

x = data.iloc[:, 0:-1].values   # 'ilock' > use array indexing in np
y_string = list(data.iloc[:, -1])

In [4]:
# convert the y data into values
y_int = []
for element in y_string:
    if element == "positive":
        y_int.append(1)
    else:
        y_int.append(0)       

In [5]:
#convert list to np array
y = np.array(y_int, dtype='float64')    # float bez x is of float type

Normalization

In [6]:
# Feature Normalisation in range (-1,1) using StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)

In [7]:
# Convert Arrays to Tensors
x = torch.tensor(x)
y = torch.tensor(y).unsqueeze(1)
# unsquesed to add a dimension

Creating and Loading Dataset

In [8]:
# create class dataset
class Dataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
# self is used to refrence to Dataset

In [9]:
dataset = Dataset(x,y)

In [10]:
# Load data to database for batch processing and shifting
train_loader = DataLoader(dataset = dataset,
           batch_size = 32,
           shuffle = True)

Building the Neural Network

In [11]:
class Model(nn.Module):         #inherit from nn.Module
    # Initalisation function
    def __init__(self, input_feature, output_features):
        super(Model, self).__init__()
        # define the layers, ie the attributes
        self.fc1 = nn.Linear(input_feature, 5)        # fc > fully connected layer, MLB or linear layer
        self.fc2 = nn.Linear(5, 4)        
        self.fc3 = nn.Linear(4, 3)        
        self.fc4 = nn.Linear(3, output_features) 
        # define the activation functions, ie the functionallites  
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

        # Every function needs to have attributes (layers of NN) and functionalities (froward & back propogation function)

    # Froward Propogation Function
    def forward(self, x):
        # passing the data through NN
        out = self.fc1(x)       # pass throug layer
        out = self.tanh(out)    #pass through activation function
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

# Defining back proppogation method is not required as pytorch would do it


In [12]:
# Create the network (an object of the Net class)
net = Model(7, 1)
# No. of input feature/layer is 7 &  No. of output feature/layer is 1

In [13]:
# Define Loss function ie BCE (BCE bez the output needs to be 0 or 1)
criterion = nn.BCELoss(size_average= True)      #size_average=True > Losses are averaged over observations for each minibatch & get one value for loss



In [14]:
# Define Optimisation function ie SGD with momentum with learning rate of 0.1
optimizer = torch.optim.SGD(net.parameters(), lr = 0.1, momentum = 0.9)       # parameters is the attributes/ weights for each layer

Training the Network

In [16]:
# Define Hyper-parameter ie no. of epochs
epochs = 200

for epoch in range(200):
    for (inputs, labels) in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Forward prop.
        outputs = net(inputs)
        # Loss Calculation
        loss = criterion(outputs, labels)

        # Begin forward propagation
        # STEP1 clear gradient buffer (w = w - lr*gradient)
        optimizer.zero_grad()
        # STEP2 calculate the gradient // backpropagation
        loss.backward()
        # STEP3 update the weights
        optimizer.step()

    # Calculate training accuracy
    output = (outputs > 0.5).float()

    # Calculate average accuracy    (output == labels).sum() / output.shape[0]
    accuracy = (output == labels).float().mean()

    # Print Statistics
    print("Epoch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1, epochs, loss, accuracy))

Epoch 1/200, Loss: 0.618, Accuracy: 0.688
Epoch 2/200, Loss: 0.566, Accuracy: 0.594
Epoch 3/200, Loss: 0.429, Accuracy: 0.781
Epoch 4/200, Loss: 0.405, Accuracy: 0.844
Epoch 5/200, Loss: 0.433, Accuracy: 0.781
Epoch 6/200, Loss: 0.462, Accuracy: 0.875
Epoch 7/200, Loss: 0.483, Accuracy: 0.750
Epoch 8/200, Loss: 0.376, Accuracy: 0.781
Epoch 9/200, Loss: 0.511, Accuracy: 0.750
Epoch 10/200, Loss: 0.427, Accuracy: 0.844
Epoch 11/200, Loss: 0.625, Accuracy: 0.688
Epoch 12/200, Loss: 0.408, Accuracy: 0.812
Epoch 13/200, Loss: 0.540, Accuracy: 0.719
Epoch 14/200, Loss: 0.604, Accuracy: 0.688
Epoch 15/200, Loss: 0.399, Accuracy: 0.844
Epoch 16/200, Loss: 0.577, Accuracy: 0.625
Epoch 17/200, Loss: 0.413, Accuracy: 0.781
Epoch 18/200, Loss: 0.542, Accuracy: 0.750
Epoch 19/200, Loss: 0.473, Accuracy: 0.812
Epoch 20/200, Loss: 0.493, Accuracy: 0.781
Epoch 21/200, Loss: 0.611, Accuracy: 0.688
Epoch 22/200, Loss: 0.504, Accuracy: 0.750
Epoch 23/200, Loss: 0.519, Accuracy: 0.719
Epoch 24/200, Loss: 