In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as functions
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader#, IterableDataset

In [None]:
# Load the dataset using Pandas
data = pd.read_csv('diabetes.csv')
print(data.shape)
data.head()

In [None]:
# For x: Extract out the dataset from all the rows (all samples) and all columns except last column (all features). 
# For y: Extract out the last column (which is the label)
# Convert both to numpy using the .values method
x = data.iloc[:,0:-1].values
print(x)
y_string= list(data.iloc[:,-1])
print(type(y_string))

In [None]:
# Lets have a look some samples from our data
print(x[:3]) #same as dataframe.head(3)
print(y_string[:3])

In [None]:
# Our neural network only understand numbers! So convert the string to labels
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)

In [None]:
# Now convert to an array
y = np.array(y_int, dtype = 'float64')

In [None]:
# Feature Normalization. All features should have the same range of values (-1,1)
sc = StandardScaler()
x = sc.fit_transform(x)

In [None]:
# Now we convert the arrays to PyTorch tensors
x = torch.tensor(x)
# We add an extra dimension to convert this array to 2D
y = torch.tensor(y).unsqueeze(1)

In [None]:
print(x.shape)
print(y.shape)

In [None]:
class Dataset(Dataset):

    def __init__(self,x,y):
        self.x = x
        self.y = y
        
    def __getitem__(self,index):
        # Get one item from the dataset
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [None]:
dataset = Dataset(x,y)

In [None]:
type(dataset)

In [None]:
len(dataset)

In [None]:
# Load the data to your dataloader for batch processing and shuffling
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True)
print(type(train_loader))

In [None]:
# Let's have a look at the data loader
print("There is {} batches in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (batch), there is:")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

In [None]:
# Building a Class for the model neural network
class DiabetesSimpleNeuralNetModel(nn.Module):
    def __init__(self, input_features):
        super(DiabetesSimpleNeuralNetModel, self).__init__()
        self.inputlayer = nn.Linear(input_features, 5) #can be passed as parameters as well 
        self.hiddenlayer = nn.Linear(5, 3)  #can be passed as parameters as well
        self.outputlayer = nn.Linear(3, 1)  #can be passed as parameters as well
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.relu = functions.relu
        
    def forward(self, x):
        inputlayerout = self.inputlayer(x)
        out1fn = self.relu(inputlayerout)     # can be self.tanh or any other activation functions
        hiddenlayerout = self.hiddenlayer(out1fn)
        out2fn = self.relu(hiddenlayerout)    # can be self.tanh or any other activation functions
        finaloutput = self.outputlayer(out2fn)
        sigmoidfinaloutput = self.sigmoid(finaloutput)
        return sigmoidfinaloutput

In [None]:
# Create the network (an object of the Net class)
net = DiabetesSimpleNeuralNetModel(x.shape[1])
net

In [None]:
#In Binary Cross Entropy: the input and output should have the same shape 
#size_average = True --> the losses are averaged over observations for each minibatch
criterion = torch.nn.BCELoss(size_average=True) #torch.nn.CrossEntropyLoss()

In [None]:
# We will use SGD with momentum with a learning rate of 0.1
optimizer = torch.optim.SGD(net.parameters(), lr=0.1) #, momentum=0.9

In [None]:
# Train the network 
num_epochs = 20
for epoch in range(num_epochs):
    for inputs,labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Feed Forward
        output = net(inputs) #is same as net.forward(inputs)
        # Loss Calculation
        loss = criterion(output, labels)
        # Clear the gradient buffer (we don't want to accumulate gradients)
        optimizer.zero_grad()
        # Backpropagation 
        loss.backward()
        # Weight Update: w <-- w - lr * gradient
        optimizer.step()
        
    #Accuracy
    # Since we are using a sigmoid, we will need to perform some thresholding
    output = (output>0.5).float()
    # Accuracy: (output == labels).float().sum() / output.shape[0]
    accuracy = (output == labels).float().mean()
    # Print statistics 
    print("Epoch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1,num_epochs, loss, accuracy))