In [1]:
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

from sklearn.preprocessing import LabelEncoder

from tqdm import tqdm

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
df = pd.read_csv("./data/diabetes_data.csv", delimiter=";")

In [3]:
df.shape

(520, 17)

In [4]:
df.head()

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,weakness,polyphagia,genital_thrush,visual_blurring,itching,irritability,delayed_healing,partial_paresis,muscle_stiffness,alopecia,obesity,class
0,40,Male,0,1,0,1,0,0,0,1,0,1,0,1,1,1,1
1,58,Male,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1
2,41,Male,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1
3,45,Male,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1
4,60,Male,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1


In [5]:
df["class"].value_counts()

class
1    320
0    200
Name: count, dtype: int64

In [6]:
encoder = LabelEncoder()
encoder.fit(df["gender"])
df["gender"] = encoder.transform(df["gender"])

In [7]:
df

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,weakness,polyphagia,genital_thrush,visual_blurring,itching,irritability,delayed_healing,partial_paresis,muscle_stiffness,alopecia,obesity,class
0,40,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1,1
1,58,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0,1
2,41,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1
3,45,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0,1
4,60,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,39,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1
516,48,0,1,1,1,1,1,0,0,1,1,1,1,0,0,0,1
517,58,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1
518,32,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0


In [8]:
X = df.drop("class", axis=1)
y = df["class"]

In [9]:
# features
X

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,weakness,polyphagia,genital_thrush,visual_blurring,itching,irritability,delayed_healing,partial_paresis,muscle_stiffness,alopecia,obesity
0,40,1,0,1,0,1,0,0,0,1,0,1,0,1,1,1
1,58,1,0,0,0,1,0,0,1,0,0,0,1,0,1,0
2,41,1,1,0,0,1,1,0,0,1,0,1,0,1,1,0
3,45,1,0,0,1,1,1,1,0,1,0,1,0,0,0,0
4,60,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,39,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0
516,48,0,1,1,1,1,1,0,0,1,1,1,1,0,0,0
517,58,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1
518,32,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0


In [10]:
# Target/ Groundtruth
y

0      1
1      1
2      1
3      1
4      1
      ..
515    1
516    1
517    1
518    0
519    0
Name: class, Length: 520, dtype: int64

In [11]:
X.shape, y.shape

((520, 16), (520,))

In [12]:
type(X)

pandas.core.frame.DataFrame

In [13]:
class DiabetesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        features = self.X[idx]
        target = self.y[idx]
        return features, target

In [14]:
dataset = DiabetesDataset(X.values, y.values) # passing the ndarray
train_set, test_set = random_split(dataset, [0.7, 0.3])

print(dataset[1])
print(len(train_set)), print(len(test_set))

train_dataloader = DataLoader(train_set, shuffle=True, batch_size=32)
test_dataloader = DataLoader(test_set, shuffle=False)

(tensor([58.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,
         1.,  0.]), tensor(1.))
364
156


In [15]:
input_size = X.shape[1]
hidden_size1 = 50
hidden_size2 = 25
hidden_size3 = 20
output_size = 1

In [16]:
model1 = nn.Sequential(
    nn.Linear(input_size, hidden_size1),
    nn.ReLU(),
    nn.Linear(hidden_size1, hidden_size2),
    nn.ReLU(),
    nn.Linear(hidden_size2, hidden_size3),
    nn.ReLU(),
    nn.Linear(hidden_size3, output_size),
    nn.Sigmoid()
)

In [17]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)


model1.apply(init_weights)

  torch.nn.init.xavier_uniform(m.weight)


Sequential(
  (0): Linear(in_features=16, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=25, bias=True)
  (3): ReLU()
  (4): Linear(in_features=25, out_features=20, bias=True)
  (5): ReLU()
  (6): Linear(in_features=20, out_features=1, bias=True)
  (7): Sigmoid()
)

In [18]:
class MyModel(nn.Module):
    def __init__(self, in_size, hidden_size1, hidden_size2, hidden_size3, out_size):
        super().__init__()
        self.layer1 = nn.Linear(in_size, hidden_size1)
        self.act1 = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size1, hidden_size2)
        self.act2 = nn.ReLU()
        self.layer3 = nn.Linear(hidden_size2, hidden_size3)
        self.act3 = nn.ReLU()
        self.layer4 = nn.Linear(hidden_size3, out_size)
        self.sigmoid = nn.Sigmoid()
        
    
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act2(self.layer2(x))
        x = self.act3(self.layer3(x))
        x = self.sigmoid(self.layer4(x))
        return x
        #

In [19]:
#model1 = MyModel(in_size=input_size, hidden_size1=hidden_size1, hidden_size2=hidden_size2, hidden_size3=hidden_size3, out_size=output_size)

In [20]:
n_epochs = 100
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model1.parameters(), lr=1e-4)


In [21]:
def checksum(model):
    s = 0.0
    for param in model.parameters():
        s += torch.sum(param)
    return s

In [22]:
def train(model, dataloader, optimizer, loss_fn):
    batch_loss = {}
    batch_accuracy = {}
    correct = 0
    _correct = 0
    size = 0  # dataset size
    
    
    for epoch in tqdm(range(n_epochs)):
        running_loss = 0.
        train_batch_acc = 0.
        correct = 0
        _correct = 0
        size = 0  # dataset size
        model.train()
        for batch, (X_batch, y_batch) in enumerate(dataloader):
            #print(f"batch : {batch}")
            y_batch_pred = model(X_batch)
            #print(f"y batch: {y_batch.shape}")
            #print(f"y batch pred: {y_batch_pred.shape}")
            #print(y_batch_pred.shape)
            #print(y_batch_pred)
            #print(y_batch_pred)
        
            y_batch_pred = y_batch_pred.squeeze(dim=1)
            # print(y_batch_pred.shape)
            # print(y_batch.shape)
            # print(y_batch_pred)
            # print(y_batch)

            #print('op')
            #print(y_batch_pred)
            #print(y_batch_pred.round())
            #print(y_batch)
            loss = loss_fn(y_batch_pred, y_batch)
            
            optimizer.zero_grad()
            # backward pass
            loss.backward()
            # update weights
            optimizer.step()
            
            #print(f"before: {checksum(model=model)}")
            running_loss += loss.item()
            _correct = (y_batch_pred.round()==y_batch).type(torch.float).sum().item()
            correct += _correct
            #print(f"dim : {y_batch_pred}")
        
            #print(f"dim : {y_batch_pred.round()}")
            #print(f"dim : {y_batch}")
            #print(f"_correct : {_correct}")
            #print(f"X_batch : {X_batch}")
            size += len(X_batch)

            #print(f"after: {checksum(model=model)}")


            # if batch % 16 == 0:
            #     #print(f"batche: {batch}")
            #     print(f"loss: {loss.item()}")
            
            #batch_loss[batch] = loss.item()
            #batch_accuracy[batch] = _correct/_batch_size
            #print(f"correct: {correct} and size {size}")
    
        train_acc = correct / size
        print(f"Train Accuracy: {train_acc}")
    

    return batch_loss, train_acc

In [23]:
batch_loss, train_acc = train(model=model1, dataloader=train_dataloader, optimizer=optimizer, loss_fn=loss_fn)
# train_batch_loss = []
# for epoch in tqdm(range(n_epochs)):
#     batch_loss, train_acc = train(model=model, dataloader=train_dataloader, optimizer=optimizer, loss_fn=loss_fn)
#     print(f"Train Accuracy: {train_acc}")
#     for i in range(len(batch_loss)):
#         train_batch_loss.append(batch_loss[i])

    

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
 19%|█▉        | 19/100 [00:00<00:00, 92.94it/s]

Train Accuracy: 0.6098901098901099
Train Accuracy: 0.6098901098901099
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6098901098901099
Train Accuracy: 0.6098901098901099
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627


 39%|███▉      | 39/100 [00:00<00:00, 96.65it/s]

Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6126373626373627
Train Accuracy: 0.6373626373626373
Train Accuracy: 0.6373626373626373
Train Accuracy: 0.6208791208791209
Train Accuracy: 0.6538461538461539
Train Accuracy: 0.6840659340659341
Train Accuracy: 0.6758241758241759
Train Accuracy: 0.6565934065934066
Train Accuracy: 0.6675824175824175
Train Accuracy: 0.6813186813186813
Train Accuracy: 0.6895604395604396
Train Accuracy: 0.6923076923076923
Train Accuracy: 0.695054945054945
Train Accuracy: 0.6978021978021978
Train Accuracy: 0.7554945054945055


 59%|█████▉    | 59/100 [00:00<00:00, 97.95it/s]

Train Accuracy: 0.7554945054945055
Train Accuracy: 0.7005494505494505
Train Accuracy: 0.7967032967032966
Train Accuracy: 0.8076923076923077
Train Accuracy: 0.7939560439560439
Train Accuracy: 0.8214285714285714
Train Accuracy: 0.8159340659340659
Train Accuracy: 0.8076923076923077
Train Accuracy: 0.7884615384615384
Train Accuracy: 0.8186813186813187
Train Accuracy: 0.8434065934065934
Train Accuracy: 0.8324175824175825
Train Accuracy: 0.8214285714285714
Train Accuracy: 0.8269230769230769
Train Accuracy: 0.8379120879120879
Train Accuracy: 0.8159340659340659
Train Accuracy: 0.8379120879120879
Train Accuracy: 0.8571428571428571
Train Accuracy: 0.8543956043956044
Train Accuracy: 0.8598901098901099


 69%|██████▉   | 69/100 [00:00<00:00, 96.68it/s]

Train Accuracy: 0.8434065934065934
Train Accuracy: 0.8351648351648352
Train Accuracy: 0.8571428571428571
Train Accuracy: 0.8489010989010989
Train Accuracy: 0.8489010989010989
Train Accuracy: 0.8571428571428571
Train Accuracy: 0.8571428571428571
Train Accuracy: 0.8571428571428571
Train Accuracy: 0.8571428571428571
Train Accuracy: 0.8626373626373627
Train Accuracy: 0.8598901098901099
Train Accuracy: 0.8626373626373627
Train Accuracy: 0.8653846153846154
Train Accuracy: 0.8626373626373627
Train Accuracy: 0.8653846153846154
Train Accuracy: 0.8653846153846154
Train Accuracy: 0.8708791208791209
Train Accuracy: 0.8681318681318682


 89%|████████▉ | 89/100 [00:00<00:00, 92.62it/s]

Train Accuracy: 0.8653846153846154
Train Accuracy: 0.8653846153846154
Train Accuracy: 0.8791208791208791
Train Accuracy: 0.8598901098901099
Train Accuracy: 0.8681318681318682
Train Accuracy: 0.8653846153846154
Train Accuracy: 0.8818681318681318
Train Accuracy: 0.8791208791208791
Train Accuracy: 0.8736263736263736
Train Accuracy: 0.8791208791208791
Train Accuracy: 0.8818681318681318
Train Accuracy: 0.8846153846153846
Train Accuracy: 0.8818681318681318
Train Accuracy: 0.8846153846153846
Train Accuracy: 0.8818681318681318
Train Accuracy: 0.8763736263736264
Train Accuracy: 0.8791208791208791
Train Accuracy: 0.8873626373626373
Train Accuracy: 0.8818681318681318
Train Accuracy: 0.8791208791208791


100%|██████████| 100/100 [00:01<00:00, 94.48it/s]

Train Accuracy: 0.8818681318681318
Train Accuracy: 0.8873626373626373





In [24]:
t1 = torch.tensor([0.51])
print(t1.round())
op = nn.Sigmoid()
print(op(t1))

tensor([1.])
tensor([0.6248])
