In [1]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from myDataSet import *
from myNetworks import *
from myUtil import *
from tqdm import tqdm

In [2]:
# Predefined dictionary to ensure same one-hot encoding for both train and test data.
my_label_map = {}
for i in range(5): my_label_map[str(i)] = i
my_label_map

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4}

In [3]:
# Loading all training data
train_data_fname = "trainData.txt"
_ = MyCustomDataset(filename=train_data_fname,
                    skip_rows=0,
                    label_col=0,
                    feature_cols=(1,2),
                    label_map=my_label_map)


# Splitting into training and validation set
validation_fraction = 0.1
my_lengths = (_.__len__()-int(validation_fraction*_.__len__()),  int(validation_fraction*_.__len__()))
training_data, validation_data = torch.utils.data.random_split(dataset=_,
                                                               lengths=my_lengths)
print("## Setting aside:", validation_fraction*100, "% for validation. ##")

# Loading test data
test_data_fname = "testData.txt"
test_data = MyCustomDataset(filename=test_data_fname,
                            skip_rows=0,
                            label_col=0,
                            feature_cols=(1,2),
                            label_map=my_label_map)

# Utilizing torch 'dataloader' framework
my_batch_size = 4
train_dataloader = DataLoader(training_data, batch_size=my_batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=my_batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_data, batch_size=my_batch_size, shuffle=True)
print("  - Using batch size: ", my_batch_size)
print("  - Nr. training batches: ", train_dataloader.__len__())
print("  - Nr. validation batches: ", validation_dataloader.__len__())
print("  - Nr. test batches: ", test_dataloader.__len__())

## Setting aside: 10.0 % for validation. ##
  - Using batch size:  4
  - Nr. training batches:  270
  - Nr. validation batches:  30
  - Nr. test batches:  75


In [4]:
# Creating instance of my neural net
my_number_of_features = 2
my_number_of_classes = 5
my_net = NeuralNet(nr_classes=my_number_of_classes,
                   nr_features=my_number_of_features)

In [None]:
my_number_of_epochs = 200
result = my_net.train_network(train_dataloader=train_dataloader,
                              validation_dataloader=validation_dataloader,
                              epochs=my_number_of_epochs,
                              device_name='cpu')

train_accuracies, train_losses, validation_accuracies, validation_losses = result

 26%|██▌       | 52/200 [00:05<00:15,  9.49it/s]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(7,4))
epochs = [i for i in range(len(train_accuracies))]
ax.plot(epochs,train_accuracies,label="train acc")
ax.plot(epochs,train_losses,label="train loss")
ax.plot(epochs,validation_accuracies,ls='--',label="validation acc")
ax.plot(epochs,validation_losses,label="test loss")
ax.set_ylim(-0.1,1.2)
ax.set_xlim(-0.2,my_number_of_epochs+.2)
ax.hlines(0,-0.2,my_number_of_epochs+.2,lw=4,color="k")
ax.set_xlabel("Epoch nr.")
ax.legend(loc=0)
plt.show()

In [None]:
test_model(trained_model=my_net,
           data=test_dataloader,
           verbose=False)