# Load dataset:

In [6]:
from components.models.utils import train_test_split
from components.datasets.dataset_factory import dataset_factory
from components.datasets.dataset_enum import Dataset
from components.text_handler.embedding.embedding_enum import EmbeddingType
from components.text_handler.embedding.embedding_factory import embedding_factory

embedding = embedding_factory(EmbeddingType.GLOVE)
dataset = dataset_factory(Dataset.GO_EMOTIONS, embedding)
train_dataset_data, train_dataset_labels, test_dataset_data, test_dataset_labels = train_test_split(dataset, 0.85)
train_size = len(train_dataset_data)
test_size = len(test_dataset_data)


# Create model:
* Only GRU for now

In [7]:
import torch
from torch import nn
from components.models.algorithms.gru import GRU

input_size = 100
output_size = 28 # we have 28 labels
hidden_size = 64
learning_rate = 0.0004
batch_size = 32
num_epochs = 10

model = GRU(input_size, output_size, hidden_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Define metrics objects:

In [8]:
import torchmetrics

accuracy = torchmetrics.Accuracy(num_classes=output_size)
roc_accuracy = torchmetrics.AUROC(num_classes=output_size)
F1_accuracy = torchmetrics.F1Score(num_classes=output_size)



# Train model:

In [None]:
from components.models.algorithms.feed_nn_model import feed_model
from components.models.utils import shuffle
from tqdm import tqdm

LOSS = "loss"
TRAIN = "train"
TEST = "test"
ACC = "accuracy"
ROC = "roc accuracy"
F1 = "F1 accuracy"

measurements = {
    LOSS: {TRAIN: [], TEST: []},
    ACC: {TRAIN: [], TEST: []},
    ROC: {TRAIN: [], TEST: []},
    F1: {TRAIN: [], TEST: []}
}

for epoch in tqdm(range(num_epochs)):

    # train the model
    feed_model(model, hidden_size, train_dataset_data, train_dataset_labels, train_size,
               batch_size, criterion, optimizer, accuracy, roc_accuracy, F1_accuracy, measurements, TRAIN)

    # test the model
    feed_model(model, hidden_size, test_dataset_data, test_dataset_labels, test_size,
               batch_size, criterion, optimizer, accuracy, roc_accuracy, F1_accuracy, measurements, TEST)

    # shuffle train set
    train_dataset_data, train_dataset_labels = shuffle(train_dataset_data, train_dataset_labels)




# Plot results:

In [None]:
from matplotlib import pyplot as plt

if len(measurements[LOSS][TRAIN]):
    print("Plotting results:\n")
    for measurement_key, measurement in measurements.items():
        print(f"{measurement_key}: train={measurement[TRAIN][-1]}, test={measurement[TEST][-1]}")

        plt.title(f'{"gru"}, hidden_size={hidden_size}')
        plt.xlabel('Epoch')
        plt.ylabel(measurement_key)
        plt.plot(measurement[TRAIN], label=f'train {measurement_key}')
        plt.plot(measurement[TEST], label=f'test {measurement_key}')
        plt.legend()
        plt.show()
else:
    print("You didn't run the training step!")