# Code for training with validation
TODO: Implement early stopping with patience

In [15]:
import torch
import copy


def train_model(model, train_data, train_labels, val_data, val_labels, epochs, batch_size, loss_func, optimizer):
    
    print("Epoch\t train loss\t validation loss")
    best_model = copy.deepcopy(model.state_dict())
    best_loss = 10000 # bad value for dis, validation loss comparison
    best_model_epoch = 0
    best_val_accuracy = 0

    for epoch in range(epochs):
        epoch_train_loss = 0
        epoch_val_loss = 0
        model.train() # Set model to train mode
        
        for i in range(len(train_data)//batch_size): # BATCH SIZE MUST BE EVEN DIVIDER OF DATA LEN, otherwise we miss stuff here
            start = i*batch_size
            end = (i+1)*batch_size

            train_inputs = train_data[start:end]
            train_truth = train_labels[start:end]
            train_outputs = model(train_inputs)

            loss = loss_func(train_outputs, train_truth)
            
            #print("LOSS: ", loss)
            epoch_train_loss += loss
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        

        model.eval() # Set model to evaluation mode

        #val_inputs = torch.tensor(val_data, dtype = torch.float32, device = "cuda:0")
        #val_truth = torch.tensor(val_labels, dtype = torch.long, device = "cuda:0")
        val_outputs = model(val_data)
        val_loss = loss_func(val_outputs, val_labels)
        val_accuracy = accuracy_check(model, val_data, val_labels)
        epoch_val_loss = val_loss

        # Check for new best model, this should be on val_data instead
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_model_epoch = epoch
            best_model = copy.deepcopy(model.state_dict())
            print(epoch, "\t ", epoch_train_loss.item()/(len(train_data)//batch_size), "\t", val_accuracy)
            #print(epoch, "\t ", epoch_train_loss.item()/(len(train_data)//batch_size), "\t", epoch_val_loss.item())

        #print(epoch, "\t ", epoch_train_loss.item()/(len(train_data)//batch_size), "\t", epoch_val_loss.item())
    print("Best model found on epoch: ", best_model_epoch)
    model.load_state_dict(best_model) # Set model to best performing one.
    

# TODO: Implement patience.
# TODO: Graphing of train and val loss, save train and val loss & perhaps accuracy to plot later.


def accuracy_check(network, data, labels):
    network.eval()

    # Accuracy check
    r = network(data)
    p = torch.max(r,1)[1]
    c = torch.sum(p == labels)
    acc = c.item()/len(p)
    #print("ACCURACY:", acc)
    return(acc)
    

In [18]:
import numpy as np
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

#%run deepCNN.ipynb
#%run shallowCNN.ipynb
%run EEGNet.ipynb
%run data_handler.ipynb


kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=None)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# TODO: I must implement patience and run on more epochs. Best model was found on epoch 19 very often so there is potential for improvement.
#This is my 5-fold validation loop :D
def cross_validation_loop(LR = 0.001, epochs = 50, batch_size = 10):
    filename = "results/results_eegnet_experiment.txt"

    fil = open(filename, "a")
    fil.write("Experiment done on model {0}. \n Learning rate: {1} \n Optimizer: {2} \n Loss function: {3} \n Epochs: {6}, Batch size: {7} \n Experiments run on {4} interval with frequency {5} Hz on the data."
    .format("EEGNet", LR, "Adam", "NLLLoss", "full", 254, epochs, batch_size))
    fil.close()

    for subject_nr in range(1,11):
        print("Loading subject ", subject_nr)
        data, description = load_subject(subject_nr)
        data, description = get_innerspeech(data, description)
        # data_interval = extract_action_interval(data, hz = 254)
        labels = extract_labels(description)

        train_accuracies = []
        val_accuracies = []
        test_accuracies = []


        for train_index, test_index in kf.split(data, labels, labels):
            # New model
            model = EEGNet(hz = 254, interval = "full").float().to(device)
            loss_func = nn.NLLLoss()
            op = optim.Adam(params = model.parameters(), lr = LR)

            # Choose train+val data/labels
            train_da=data[train_index]
            train_la=labels[train_index]
            # Choose test data/labels
            test_data=data[test_index]
            test_labels=labels[test_index]

            # Split train into train and val
            train_data, val_data, train_labels, val_labels = train_test_split(train_da, train_la, test_size = 0.25, random_state = None, shuffle = True, stratify = train_la)
            train_data, val_data, test_data, train_labels, val_labels, test_labels = to_device(train_data, val_data, test_data, train_labels, val_labels, test_labels, device)
            # Train loop
            train_model(model, train_data = train_data, train_labels = train_labels, val_data = val_data, val_labels = val_labels, 
                        epochs = epochs, batch_size = batch_size, loss_func = loss_func, optimizer = op)
            # Save accuracies
            train_accuracies.append(accuracy_check(model, train_data, train_labels))
            val_accuracies.append(accuracy_check(model, val_data, val_labels))
            test_accuracies.append(accuracy_check(model, test_data, test_labels))
            print(test_accuracies)
            
        # Write results for subject to file
        fil = open(filename, "a")
        fil.write("\n\nResults for subject {0}.\n".format(subject_nr))
        fil.write("Train accuracies from 5 folds:\n")
        for acc in train_accuracies:
            fil.write("{0}, ".format(acc))
        fil.write("\n")
        fil.write("Validation accuracies from 5 folds:\n")
        for acc in val_accuracies:
            fil.write("{0}, ".format(acc))
        
        fil.write("\n")
        fil.write("Test accuracies from 5 folds:\n")
        for acc in test_accuracies:
            fil.write("{0}, ".format(acc))
        fil.write("\n")
        fil.write("Avg train acc: {0}, Avg validation acc: {1}, Avg test acc: {2}, ".format(sum(train_accuracies)/5, sum(val_accuracies)/5, sum(test_accuracies)/5))
        fil.write("\n\n#####################################\n\n")
        fil.close()
    


cross_validation_loop(epochs = 50)



Loading subject  1
Epoch	 train loss	 validation loss
0 	  1.4202756881713867 	 0.25
15 	  0.020686404158671696 	 0.325
19 	  0.009589392070968946 	 0.35
Best model found on epoch:  19
[0.2]
Epoch	 train loss	 validation loss
0 	  1.45609712600708 	 0.25
Best model found on epoch:  0
[0.2, 0.25]
Epoch	 train loss	 validation loss
0 	  1.453019142150879 	 0.25
10 	  0.07708259920279185 	 0.275
13 	  0.031407430768013 	 0.325
29 	  0.0035668639466166496 	 0.35
Best model found on epoch:  29
[0.2, 0.25, 0.25]
Epoch	 train loss	 validation loss
0 	  1.4466896057128906 	 0.25
11 	  0.08088800311088562 	 0.3
16 	  0.012224850555260977 	 0.325
Best model found on epoch:  16
[0.2, 0.25, 0.25, 0.275]
Epoch	 train loss	 validation loss
0 	  1.4600060780843098 	 0.25
10 	  0.05717926720778147 	 0.275
11 	  0.03449664761622747 	 0.325
Best model found on epoch:  11
[0.2, 0.25, 0.25, 0.275, 0.3]
Loading subject  2
Epoch	 train loss	 validation loss
0 	  1.4450299399239677 	 0.25
8 	  0.042046040296