In [1]:
import fasttext
import torch
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer

from config import PROBLEM_TEST, CHECKPOINTS_DIR, ENGLISH_TRAIN, ENGLISH_TEST
from data import TextDataset, HatefulTweets
from functools import partial
from nn import train_model, BinaryMLP

from experiment import (
    run_repeated_labse_single,
    run_repeated_labse_multi,
    test_inference_time,
    calculate_memory_usage,
)

In [2]:
run_repeated_labse_single(name="labse_single_polish")

Global seed set to 1
Global seed set to 2
Global seed set to 3
Global seed set to 4
Global seed set to 5
Global seed set to 6
Global seed set to 7
Global seed set to 8
Global seed set to 9
Global seed set to 10


{'test/loss': '0.3486 ± 0.0164',
 'test/f1': '0.4444 ± 0.0283',
 'test/acc': '0.8822 ± 0.0066',
 'test/precision': '0.6048 ± 0.0451',
 'test/recall': '0.3515 ± 0.0218',
 'train/loss': '0.1519 ± 0.0421',
 'train/f1': '0.8967 ± 0.0310',
 'train/acc': '0.9816 ± 0.0062',
 'train/precision': '0.8674 ± 0.0537',
 'train/recall': '0.9296 ± 0.0068',
 'train_time': '20.5316 ± 2.2389'}

In [None]:
run_repeated_labse_single(
    name="labse_single_english",
    train_path=ENGLISH_TRAIN,
    test_path=ENGLISH_TEST,
)

In [2]:
run_repeated_labse_multi(name="labse_multi")

Global seed set to 1
Global seed set to 2
Global seed set to 3
Global seed set to 4


In [6]:
embeddings_model = SentenceTransformer("sentence-transformers/LaBSE")
get_embeddings = lambda x: embeddings_model.encode(
    x,
    convert_to_numpy=False,
    convert_to_tensor=True,
    batch_size=128,
).cpu()

dataset = TextDataset(PROBLEM_TEST, get_embeddings)
loader = DataLoader(
    dataset,
    batch_size=128,
    pin_memory=True,
    shuffle=False,
    num_workers=0,
    drop_last=True,
)

checkpoint_file = CHECKPOINTS_DIR / "labse_single_polish_1.ckpt"
model = BinaryMLP.load_from_checkpoint(
    checkpoint_file,
    emb_dim=768,
    hidden_dims=[256, 128],
    learning_rate=1e-4,
).cuda()

In [8]:
test_inference_time(model, loader)

'0.0002 ± 0.0001'

In [9]:
calculate_memory_usage(model)

'0.883 MB'