### Load neccessary libraries and some charts configuration

In [1]:
import sys
sys.path.append("../")

from sklearn.metrics import classification_report
import joblib as jbl
import torch

from utils.utils import Classifier
from utils.utils import predict

In [2]:
PRE_TRAINED_MODEL_NAME = 'bert-base-uncased'
MAX_LEN = 128
BATCH_SIZE = 16
DROPOUT = 0.5

In [3]:
classes = ['gender', 'age', 'sign', 'topic']

In [4]:
for class_name in classes:
    print(f'Preparing prediction on test set for task: <{class_name}>\n')
    
    print("Loading class names dictionary ...", end="")
    class2idx_dict = jbl.load(f'../data/dictionaries/class2idx_{class_name}.jbl')
    print("done!")
    
    print("Loading model ...", end="")
    model = Classifier(len(class2idx_dict), DROPOUT, PRE_TRAINED_MODEL_NAME)
    model.load_state_dict(torch.load(f'../models/{class_name}/best_model_state.bin', map_location=torch.device('cpu')))
    print("done!")
    
    print("Loading data ...", end="")
    test_data_loader = jbl.load(f'../data/dataloaders/{class_name}/test_data_loader.jbl')
    print("done!")
    
    print("Calculating predictions ...", end="")
    y_review_texts, y_pred, y_pred_probs, y_actual = predict(
        model,
        test_data_loader,
    )
    print("done!")
    
    # Prepare classification report with precision, recall and f1-score
    print(classification_report(y_actual, y_pred, target_names=list(class2idx_dict.keys())))

Preparing prediction on test set for task: <gender>

Loading class names dictionary ...done!
Loading model ...done!
Loading data ...done!
Calculating predictions ...done!
              precision    recall  f1-score   support

        male       0.87      0.81      0.84        16
      female       0.70      0.78      0.74         9

    accuracy                           0.80        25
   macro avg       0.78      0.80      0.79        25
weighted avg       0.81      0.80      0.80        25

Preparing prediction on test set for task: <age>

Loading class names dictionary ...done!
Loading model ...done!
Loading data ...done!
Calculating predictions ...done!
              precision    recall  f1-score   support

         <20       0.62      0.71      0.67        14
         >25       0.90      0.47      0.62        19
       20-25       0.25      0.60      0.35         5

    accuracy                           0.58        38
   macro avg       0.59      0.60      0.55        38
weighted

## Summary

Results for test data looks quite good for such quick training and no hyperparameters tuning.

Despite overfitting results on test set are similar to those observed in validation set.

Tasks with bigger classes ``topic`` and ``sign`` is something to really work, we were able to correctly predict only some of the actual values - this is due to short training and small dataset, model was not able learn the data with in such conditions.