In [None]:
data_path = "../input/pima-indians-diabetes-database/"

# Library Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from torch.nn.utils import weight_norm as WN
import torch.nn.functional as F

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_recall_curve
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer

from time import time
import random as r

# Helper Functions

In [None]:
def breaker():
    print("\n" + 30*"-" + "\n")
    
def head(x, no_of_ele=5):
    breaker()
    print(x[:no_of_ele])
    breaker()
    
def getCol(x):
    return [col for col in x.columns]

def getObj(x):
    s = (x.dtypes == "object")
    return list(s[s].index)

sc_X = StandardScaler()

# Data Handling

**Inputs**

In [None]:
data = pd.read_csv(data_path + "diabetes.csv")

breaker()
print("Dataset Shape :", repr(data.shape))
breaker()

for name in getCol(data):
    print(name)
breaker()

In [None]:
data.head(10)

In [None]:
X = data.iloc[:, :-1].copy().values
y = data.iloc[:, -1].copy().values

#X = sc_X.fit_transform(X)

X, y = X.astype(float), y.astype(float)

num_features = X.shape[1]
del data

**Dataset Template**

In [None]:
class DS(Dataset):
    def __init__(this, X=None, y=None, mode="train"):
        this.mode = mode
        this.X = X
        if mode == "train":
            this.y = y
            
    def __len__(this):
        return this.X.shape[0]
    
    def __getitem__(this, idx):
        if this.mode == "train":
            return torch.FloatTensor(this.X[idx]), torch.FloatTensor(this.y[idx])
        else:
            return torch.FloatTensor(this.X[idx])

# ANN

**Config**

In [None]:
class CFG():
    tr_batch_size = 128
    ts_batch_size = 128
    va_batch_size = 128
    
    epochs  = 50
    n_folds = 5
    
    IL = num_features
    HL_1 = [256]
    HL_2 = [256, 128]
    OL = 1
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
cfg = CFG()

sim_ts_data_setup = DS(X, None, "test")
sim_ts_data = DL(sim_ts_data_setup, batch_size=cfg.ts_batch_size, shuffle=False)

**Setup**

In [None]:
class ANN(nn.Module):
    def __init__(this, IL=None, HL=None, OL=None):
        super(ANN, this).__init__()
        
        this.HL = HL
        this.DP1 = nn.Dropout(p=0.2)
        this.DP2 = nn.Dropout(p=0.5)
        
        if len(HL) == 1:
            this.BN1 = nn.BatchNorm1d(IL)
            this.FC1 = WN(nn.Linear(IL, HL[0]))
            
            this.BN2 = nn.BatchNorm1d(HL[0])
            this.FC2 = WN(nn.Linear(HL[0], OL))
        
        elif len(HL) == 2:
            this.BN1 = nn.BatchNorm1d(IL)
            this.FC1 = WN(nn.Linear(IL, HL[0]))
            
            this.BN2 = nn.BatchNorm1d(HL[0])
            this.FC2 = WN(nn.Linear(HL[0], HL[1]))
            
            this.BN3 = nn.BatchNorm1d(HL[1])
            this.FC3 = WN(nn.Linear(HL[1], OL))
            
        else:
            raise NotImplementedError("Only Supports Networks of Depth 1 and 2")
            
    def getOptimizer(this, lr=1e-3, wd=0):
        return optim.Adam(this.parameters(), lr=lr, weight_decay=wd)
    
    def forward(this, x):
        if len(this.HL) == 1:
            x = this.BN1(x)
            #x = this.DP1(x)
            x = F.relu(this.FC1(x))
            x = this.BN2(x)
            #x = this.DP2(x)
            x = torch.sigmoid(this.FC2(x))
            return x
        else:
            x = this.BN1(x)
            #x = this.DP1(x)
            x = F.relu(this.FC1(x))
            x = this.BN2(x)
            #x = this.DP2(x)
            x = F.relu(this.FC2(x))
            x = this.BN3(x)
            #x = this.DP2(x)
            x = torch.sigmoid(this.FC3(x))
            return x

In [None]:
def train_fn(X=None, y=None, n_folds=None, HL_Used=None):
    breaker()
    print("Training ...")
    breaker()
    
    fold = 0
    LP = []
    names = []
    bestLoss = {"train" : np.inf, "valid" : np.inf}
    
    start_time = time()
    for tr_idx, va_idx in KFold(n_splits=n_folds, shuffle=True, random_state=0).split(X, y):
        print("Processing Fold {fold} ...".format(fold=fold+1))
        
        X_train, X_valid, y_train, y_valid = X[tr_idx], X[va_idx], y[tr_idx], y[va_idx]
        
        tr_data_setup = DS(X_train, y_train.reshape(-1, 1))
        va_data_setup = DS(X_valid, y_valid.reshape(-1, 1))
        
        DLS = {"train" : DL(tr_data_setup, batch_size=cfg.tr_batch_size, shuffle=True, generator=torch.manual_seed(0)),
               "valid" : DL(va_data_setup, batch_size=cfg.va_batch_size, shuffle=False)}
        
        torch.manual_seed(0)
        model = ANN(cfg.IL, HL_Used, cfg.OL)
        model.to(cfg.device)
        
        optimizer = model.getOptimizer()
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, eps=1e-6, verbose=True)
        
        for e in range(cfg.epochs):
            epochLoss = {"train" : 0, "valid" : 0}
            for phase in ["train", "valid"]:
                if phase == "train":
                    model.train()
                else:
                    model.eval()
                lossPerPass = 0
                
                for feat, label in DLS[phase]:
                    feat, label = feat.to(cfg.device), label.to(cfg.device)
                    
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == "train"):
                        output = model(feat)
                        loss   = nn.BCELoss()(output, label)
                        if phase == "train":
                            loss.backward()
                            optimizer.step()
                    lossPerPass += (loss.item() / label.shape[0])
                epochLoss[phase] = lossPerPass
            LP.append(epochLoss)
            scheduler.step(epochLoss["valid"])
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                name = "./Model_Fold_{fold}.pt".format(fold=fold)
                names.append(name)
                torch.save(model.state_dict(), name)
        fold += 1
        
    breaker()
    print("Time Taken to Train {f} folds for {e} epochs : {:.2f} minutes".format((time() - start_time)/60, f=n_folds, e=cfg.epochs))
    breaker()
    print("Best Loss :", repr(bestLoss))
    breaker()
    
    return LP, names, model

def eval_fn(model=None, names=None, dataloader=None, num_obs_test=None):
    y_pred = np.zeros((num_obs_test, 1))
        
    for name in names:
        Pred = torch.zeros(cfg.ts_batch_size, 1).to(cfg.device)
        
        model.load_state_dict(torch.load(name))
        model.eval()
        
        for X in dataloader:
            X = X.to(cfg.device)
            with torch.no_grad():
                op = model(X)
            Pred = torch.cat((Pred, op), dim=0)
        Pred = Pred[cfg.ts_batch_size:].cpu().numpy()
        y_pred = np.add(y_pred, Pred)
    y_pred = np.divide(y_pred, len(names))
    
    y_pred[np.argwhere(y_pred > 0.5)]  = 1
    y_pred[np.argwhere(y_pred <= 0.5)] = 0
    return y_pred

# Configuration 1

In [None]:
LP_1, Names_1, Network_1 = train_fn(X=X, y=y, n_folds=cfg.n_folds, HL_Used=cfg.HL_1)

y_pred = eval_fn(Network_1, set(Names_1), sim_ts_data, sim_ts_data_setup.__len__())

print("Configuration 2 Accuracy : {:.5f} %".format(accuracy_score(y, y_pred) * 100))
breaker()

LPV = []
LPT = []
for i in range(len(LP_1)):
  LPT.append(LP_1[i]["train"])
  LPV.append(LP_1[i]["valid"])

xAxis = [i+1 for i in range(cfg.epochs)]
plt.figure(figsize=(15, 30))
for fold in range(cfg.n_folds):
    plt.subplot(cfg.n_folds, 1, fold+1)
    plt.plot(xAxis, LPT[fold*cfg.epochs:(fold+1)*cfg.epochs], "b", label="Training Loss")
    plt.plot(xAxis, LPV[fold*cfg.epochs:(fold+1)*cfg.epochs], "r--", label="Validation Loss")
    plt.legend()
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Fold {fold}".format(fold=fold+1))
plt.show()

# Configuration 2

In [None]:
LP_2, Names_2, Network_2 = train_fn(X=X, y=y, n_folds=cfg.n_folds, HL_Used=cfg.HL_2)

y_pred = eval_fn(Network_2, set(Names_2), sim_ts_data, sim_ts_data_setup.__len__())

print("Configuration 2 Accuracy : {:.5f} %".format(accuracy_score(y, y_pred) * 100))
breaker()

LPV = []
LPT = []
for i in range(len(LP_2)):
  LPT.append(LP_2[i]["train"])
  LPV.append(LP_2[i]["valid"])

xAxis = [i+1 for i in range(cfg.epochs)]
plt.figure(figsize=(15, 30))
for fold in range(cfg.n_folds):
    plt.subplot(cfg.n_folds, 1, fold+1)
    plt.plot(xAxis, LPT[fold*cfg.epochs:(fold+1)*cfg.epochs], "b", label="Training Loss")
    plt.plot(xAxis, LPV[fold*cfg.epochs:(fold+1)*cfg.epochs], "r--", label="Validation Loss")
    plt.legend()
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Fold {fold}".format(fold=fold+1))
plt.show()