In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

In [15]:
# Load the dataset using Pandas
data = pd.read_csv('diabetes.csv')

In [16]:

# For x: Extract out the dataset from all the rows (all samples) and all columns except last column (all features). 
# For y: Extract out the last column (which is the label)
# Convert both to numpy using the .values method
x = data.iloc[:,0:-1].values
y_string= list(data.iloc[:,-1])

In [17]:
# Lets have a look some samples from our data
print(x[:3])
print(y_string[:3])

[[  6.  148.   72.   35.    0.   33.6  50. ]
 [  1.   85.   66.   29.    0.   26.6  31. ]
 [  8.  183.   64.    0.    0.   23.3  32. ]]
['positive', 'negative', 'positive']


In [18]:

# Our neural network only understand numbers! So convert the string to labels
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)

In [19]:
# Now convert to an array
y = np.array(y_int, dtype = 'float64')

In [20]:
# Feature Normalization. All features should have the same range of values (-1,1)
sc = StandardScaler()
x = sc.fit_transform(x)

In [21]:

# Now we convert the arrays to PyTorch tensors
x = torch.tensor(x)
# We add an extra dimension to convert this array to 2D
y = torch.tensor(y).unsqueeze(1)

In [22]:
print(x)
print(y)

tensor([[ 0.6399,  0.8483,  0.1496,  ..., -0.6929,  0.2040,  1.4260],
        [-0.8449, -1.1234, -0.1605,  ..., -0.6929, -0.6844, -0.1907],
        [ 1.2339,  1.9437, -0.2639,  ..., -0.6929, -1.1033, -0.1056],
        ...,
        [ 0.3430,  0.0033,  0.1496,  ...,  0.2796, -0.7352, -0.2758],
        [-0.8449,  0.1598, -0.4707,  ..., -0.6929, -0.2402,  1.1707],
        [-0.8449, -0.8730,  0.0462,  ..., -0.6929, -0.2021, -0.8714]],
       dtype=torch.float64)
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
      

In [23]:
print(x.shape)
print(y.shape)

torch.Size([768, 7])
torch.Size([768, 1])


In [24]:
class Dataset(Dataset):
    
    def __init__(self,x,y):
        self.x=x
        self.y=y
        
    def __getitem__(self,index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [25]:
dataset =  Dataset(x,y)

In [27]:
len(dataset)

768

In [30]:
# Load the data to your dataloader for batch processing and shuffling
train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                           batch_size=32,
                                           shuffle=True)

In [31]:
# Let's have a look at the data loader
print("There is {} batches in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (batch), there is:")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

There is 24 batches in the dataset
For one iteration (batch), there is:
Data:    torch.Size([32, 7])
Labels:  torch.Size([32, 1])


In [48]:
class Model(nn.Module):
    def __init__(self,input_features, output_features):
        super(Model,self).__init__()
        self.fc1 = nn.Linear(input_features,5)
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        self.sigmoid = nn.Sigmoid()
        self.fc4 = nn.Linear(3,output_features )
        self.tanh = nn.Tanh()
        
        
    def forward(self,x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out
        

In [52]:
net = Model(7,1)
criterion = torch.nn.BCELoss(size_average = True)

In [53]:
optimizer = torch.optim.SGD(net.parameters(),lr=0.1,momentum = 0.9)

In [57]:
epochs = 200
for epoch in range(epochs):
    for inputs,labels in train_loader:
        inputs=inputs.float()
        labels=labels.float()
        #forward prop
        outputs= net(inputs)
        #criterion
        loss = criterion(outputs,labels)
        #clear gradients
        optimizer.zero_grad()
        loss.backward()
        #update weights
        optimizer.step()
        
    #accuracy
    output = (outputs>0.5).float()
    accuracy = (output == labels).float().mean()
    print("Epoch {}/{} , loss {:.3f}".format(epoch+1,200,loss,accuracy))
    
    
    
        

Epoch 1/200 , loss 0.680
Epoch 2/200 , loss 0.398
Epoch 3/200 , loss 0.434
Epoch 4/200 , loss 0.523
Epoch 5/200 , loss 0.484
Epoch 6/200 , loss 0.461
Epoch 7/200 , loss 0.508
Epoch 8/200 , loss 0.486
Epoch 9/200 , loss 0.379
Epoch 10/200 , loss 0.595
Epoch 11/200 , loss 0.619
Epoch 12/200 , loss 0.569
Epoch 13/200 , loss 0.786
Epoch 14/200 , loss 0.489
Epoch 15/200 , loss 0.399
Epoch 16/200 , loss 0.424
Epoch 17/200 , loss 0.569
Epoch 18/200 , loss 0.530
Epoch 19/200 , loss 0.298
Epoch 20/200 , loss 0.465
Epoch 21/200 , loss 0.531
Epoch 22/200 , loss 0.507
Epoch 23/200 , loss 0.378
Epoch 24/200 , loss 0.473
Epoch 25/200 , loss 0.346
Epoch 26/200 , loss 0.409
Epoch 27/200 , loss 0.498
Epoch 28/200 , loss 0.483
Epoch 29/200 , loss 0.330
Epoch 30/200 , loss 0.530
Epoch 31/200 , loss 0.462
Epoch 32/200 , loss 0.633
Epoch 33/200 , loss 0.474
Epoch 34/200 , loss 0.544
Epoch 35/200 , loss 0.445
Epoch 36/200 , loss 0.373
Epoch 37/200 , loss 0.566
Epoch 38/200 , loss 0.569
Epoch 39/200 , loss 0

In [None]:
aa= torch.tensor([3,4,5])
bb= torch.tensor([3,1,5])

In [42]:
(aa==bb).float()

tensor([1., 0., 1.])