In [7]:
import torch as t
import torch.nn as nn
import numpy as np
import torch.optim as optim
import tqdm
import copy
from sklearn.model_selection import StratifiedKFold, train_test_split

In [8]:
import os

os.chdir("..")
data = np.loadtxt(('data/preprocessed-dataset.csv'), delimiter=',', skiprows=1)
os.chdir("methods")
x = data[:,1:35]
y = data[:,35]

#converting the data to pytorch tensors

x = t.tensor(x, dtype=t.float32)
y = t.tensor(y, dtype=t.float32).reshape(-1, 1)

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, train_size=0.8)

In [9]:
class NeuralNet(nn.Module):
    def __init__(self):    
        super().__init__()
        self.hidden = nn.Linear(34, 102)
        self.relu = nn.ReLU()
        self.output = nn.Linear(102, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.hidden(x))
        x = self.sigmoid(self.output(x))
        return x

In [10]:
def model_train(model, X_train, y_train, X_val, y_val):
    # loss function and optimizer
    loss_fn = nn.BCELoss()  # binary cross entropy
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
 
    n_epochs = 250   # number of epochs to run
    batch_size = 10  # size of each batch
    batch_start = t.arange(0, len(X_train), batch_size)
 
    # Hold the best model
    best_acc = - np.inf   # init to t infinity
    best_weights = None
 
    for epoch in range(n_epochs):
        model.train()
        with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                # take a batch
                X_batch = X_train[start:start+batch_size]
                y_batch = y_train[start:start+batch_size]
                # forward pass
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
                # print progress
                acc = (y_pred.round() == y_batch).float().mean()
                bar.set_postfix(
                    loss=float(loss),
                    acc=float(acc)
                )
        # evaluate accuracy at end of each epoch
        model.eval()
        y_pred = model(X_val)
        acc = (y_pred.round() == y_val).float().mean()
        acc = float(acc)
        if acc > best_acc:
            best_acc = acc
            best_weights = copy.deepcopy(model.state_dict())
    # restore model and return best accuracy
    model.load_state_dict(best_weights)
    return best_acc

In [11]:
kfold = StratifiedKFold(n_splits=5, shuffle=True)
cv_scores_wide = []
accuracies=[]
for train, test in kfold.split(x_train, y_train):
    # create model, train, and get accuracy
    model = NeuralNet()
    accuracy = model_train(model, x_train[train], y_train[train], x_train[test], y_train[test])
    print("Accuracy", accuracy)
    cv_scores_wide.append(accuracy)


KeyboardInterrupt: 

In [None]:
'''
TODO: CLEAN UP

'''

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, train_size=0.8)
model = NeuralNet()
accuracy = model_train(model, x_train, y_train, x_test, y_test)
print(accuracy)

0.9050966501235962
