In [1]:
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

from sklearn.preprocessing import LabelEncoder

from tqdm import tqdm

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
df = pd.read_csv("./data/diabetes_data.csv", delimiter=";")

In [3]:
df.shape

(520, 17)

In [4]:
df.head()

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,weakness,polyphagia,genital_thrush,visual_blurring,itching,irritability,delayed_healing,partial_paresis,muscle_stiffness,alopecia,obesity,class
0,40,Male,0,1,0,1,0,0,0,1,0,1,0,1,1,1,1
1,58,Male,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1
2,41,Male,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1
3,45,Male,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1
4,60,Male,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1


In [5]:
df["class"].value_counts()

class
1    320
0    200
Name: count, dtype: int64

In [6]:
encoder = LabelEncoder()
encoder.fit(df["gender"])
df["gender"] = encoder.transform(df["gender"])

In [7]:
df

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,weakness,polyphagia,genital_thrush,visual_blurring,itching,irritability,delayed_healing,partial_paresis,muscle_stiffness,alopecia,obesity,class
0,40,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,1
1,58,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1
2,41,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1
3,45,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1
4,60,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,39,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1
516,48,0,1,1,1,1,1,0,0,1,1,1,1,0,0,0,1
517,58,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1
518,32,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0


In [8]:
X = df.drop("class", axis=1)
y = df["class"]

In [9]:
# features
X

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,weakness,polyphagia,genital_thrush,visual_blurring,itching,irritability,delayed_healing,partial_paresis,muscle_stiffness,alopecia,obesity
0,40,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1
1,58,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0
2,41,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0
3,45,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0
4,60,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,39,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0
516,48,0,1,1,1,1,1,0,0,1,1,1,1,0,0,0
517,58,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1
518,32,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0


In [10]:
# Target/ Groundtruth
y

0      1
1      1
2      1
3      1
4      1
      ..
515    1
516    1
517    1
518    0
519    0
Name: class, Length: 520, dtype: int64

In [11]:
X.shape, y.shape

((520, 16), (520,))

In [12]:
type(X)

pandas.core.frame.DataFrame

In [13]:
class DiabetesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        features = self.X[idx]
        target = self.y[idx]
        return features, target

In [14]:
dataset = DiabetesDataset(X.values, y.values) # passing the ndarray
train_set, test_set = random_split(dataset, [0.7, 0.3])

print(dataset[0])
print(len(train_set)), print(len(test_set))

train_dataloader = DataLoader(train_set, shuffle=True, batch_size=8)
test_dataloader = DataLoader(test_set, shuffle=False)

(tensor([40.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  1.,
         1.,  1.]), tensor(1.))
364
156


In [15]:
input_size = X.shape[1]
hidden_size1 = 10
hidden_size2 = 5
output_size = 1

In [16]:
model = nn.Sequential(
    nn.Linear(input_size, hidden_size1),
    nn.ReLU(),
    nn.Linear(hidden_size1, hidden_size2),
    nn.ReLU(),
    nn.Linear(hidden_size2, output_size),
    nn.Sigmoid()
)

In [17]:
n_epochs = 50
loss_fn = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=5e-4, momentum=0.9)


In [18]:
def train(model, dataloader):
    batch_loss = {}
    batch_accuracy = {}
    correct = 0
    _correct = 0
    size = 0  # dataset size
    
    model.train()
    for batch, (X_batch, y_batch) in enumerate(dataloader):
        #print(f"batch : {batch}")
        y_batch_pred = model(X_batch)
        #print(y_batch_pred)
        y_batch_pred = y_batch_pred.squeeze(dim=1)
        #print('op')
        #print(y_batch_pred)
        #print(y_batch_pred.round())
        #print(y_batch)
        loss = loss_fn(y_batch_pred.round(), y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f"dim : {y_batch_pred}")
        #print(f"dim : {y_batch_pred.round()}")
        #print(f"dim : {y_batch}")
        _correct = (y_batch_pred.round()==y_batch).type(torch.float).sum().item()
        #print(f"_correct : {_correct}")
        _batch_size = len(X_batch)

        correct += _correct

        # if batch % 16 == 0:
        #     #print(f"batche: {batch}")
        #     print(f"loss: {loss.item()}")

        batch_loss[batch] = loss.item()
        batch_accuracy[batch] = _correct/_batch_size

        size += _batch_size
    
    correct /= size
    print(f"Train Accuracy: {correct}")

    return batch_loss

In [19]:
train_batch_loss = []
for epoch in tqdm(range(n_epochs)):
    batch_loss = train(model=model, dataloader=train_dataloader)
    for i in range(len(batch_loss)):
        train_batch_loss.append(batch_loss[i])

    

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
 12%|█▏        | 6/50 [00:00<00:01, 26.16it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


 24%|██▍       | 12/50 [00:00<00:01, 27.00it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


 36%|███▌      | 18/50 [00:00<00:01, 27.71it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


 50%|█████     | 25/50 [00:00<00:00, 28.90it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


 62%|██████▏   | 31/50 [00:01<00:00, 29.12it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


 74%|███████▍  | 37/50 [00:01<00:00, 28.38it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


 86%|████████▌ | 43/50 [00:01<00:00, 28.08it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


 98%|█████████▊| 49/50 [00:01<00:00, 27.48it/s]

Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901
Train Accuracy: 0.3901098901098901


100%|██████████| 50/50 [00:01<00:00, 27.68it/s]

Train Accuracy: 0.3901098901098901





In [20]:
t1 = torch.tensor([0.51])
print(t1.round())
op = nn.Sigmoid()
print(op(t1))

tensor([1.])
tensor([0.6248])
