In [1]:
import numpy as np
import torch as T
import pandas as pd
import tqdm
import copy
from sklearn.model_selection import StratifiedKFold, train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from sklearn.model_selection import train_test_split
import os

os.chdir("..")
data = np.loadtxt(('data/preprocessed-dataset.csv'), delimiter=',', skiprows=1)
os.chdir("methods")
x = data[0:, 1:35]
y = data[:, 35]

x = T.tensor(x, dtype=T.float32)
y = T.tensor(y, dtype=T.float32).reshape(-1, 1)

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, train_size=0.8)

In [3]:
class Deep(T.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.layer1 = T.nn.Linear(34, 34)
        self.act1 = T.nn.ReLU()
        
        self.layer2 = T.nn.Linear(34, 34)
        self.act2 = T.nn.ReLU()
        
        self.layer3 = T.nn.Linear(34, 34)
        self.act3 = T.nn.ReLU()
        
        self.layer4 = T.nn.Linear(34, 34)
        self.act4 = T.nn.ReLU()
        
        self.output = T.nn.Linear(34, 1)
        self.sigmoid = T.nn.Sigmoid()
 
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act2(self.layer2(x))
        x = self.act3(self.layer3(x))
        x = self.act4(self.layer4(x))
        
        x = self.sigmoid(self.output(x))
        return x

In [4]:
def model_train(model, X_train, y_train, X_val, y_val):
    # loss function and optimizer
    loss_fn = T.nn.BCELoss()  # binary cross entropy
    optimizer = T.optim.Adam(model.parameters(), lr=0.0001)
 
    n_epochs = 250   # number of epochs to run
    batch_size = 10  # size of each batch
    batch_start = T.arange(0, len(X_train), batch_size)
 
    # Hold the best model
    best_acc = - np.inf   # init to t infinity
    best_weights = None
 
    for epoch in range(n_epochs):
        model.train()
        with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                # take a batch
                X_batch = X_train[start:start+batch_size]
                y_batch = y_train[start:start+batch_size]
                # forward pass
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
                # print progress
                acc = (y_pred.round() == y_batch).float().mean()
                bar.set_postfix(
                    loss=float(loss),
                    acc=float(acc)
                )
        # evaluate accuracy at end of each epoch
        model.eval()
        y_pred = model(X_val)
        acc = (y_pred.round() == y_val).float().mean()
        acc = float(acc)
        if acc > best_acc:
            best_acc = acc
            best_weights = copy.deepcopy(model.state_dict())
    # restore model and return best accuracy
    model.load_state_dict(best_weights)
    return best_acc

In [5]:
kfold = StratifiedKFold(n_splits=5, shuffle=True)
cv_scores_wide = []
for train, test in kfold.split(x_train, y_train):
    # create model, train, and get accuracy
    model = Deep()
    accuracy = model_train(model, x_train[train], y_train[train], x_train[test], y_train[test])
    print("Accuracy", accuracy)
    cv_scores_wide.append(accuracy)


KeyboardInterrupt: 

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, train_size=0.8)
model = Deep()
accuracy = model_train(model, x_train, y_train, x_test, y_test)
print(accuracy)

0.8980667591094971
