In [1]:
import time
from statistics import mean, stdev

import fasttext
import torch
from torch.utils.data import DataLoader

from config import SG_CORPUS, SG_FULL, CHECKPOINTS_DIR, PROBLEM_TEST
from data import HatefulTweets, WordDataset
from experiment import run_repeated_cnn, test_inference_time, calculate_memory_usage
from nn import CNNModel

In [2]:
run_repeated_cnn(SG_CORPUS, name="cnn_corpus")

Global seed set to 1
Global seed set to 2
Global seed set to 3
Global seed set to 4
Global seed set to 5
Global seed set to 6
Global seed set to 7
Global seed set to 8
Global seed set to 9
Global seed set to 10


{'test/loss': '0.3227 ± 0.0132',
 'test/f1': '0.5141 ± 0.0405',
 'test/acc': '0.8961 ± 0.0068',
 'test/precision': '0.6896 ± 0.0468',
 'test/recall': '0.4119 ± 0.0470',
 'train/loss': '0.1018 ± 0.0305',
 'train/f1': '0.9644 ± 0.0163',
 'train/acc': '0.9937 ± 0.0029',
 'train/precision': '0.9378 ± 0.0287',
 'train/recall': '0.9928 ± 0.0043',
 'train_time': '86.6700 ± 9.5976'}

In [3]:
run_repeated_cnn(SG_FULL, name="cnn_full")

Global seed set to 1
Global seed set to 2
Global seed set to 3
Global seed set to 4
Global seed set to 5
Global seed set to 6
Global seed set to 7
Global seed set to 8
Global seed set to 9
Global seed set to 10


{'test/loss': '0.3381 ± 0.0126',
 'test/f1': '0.4818 ± 0.0283',
 'test/acc': '0.8859 ± 0.0056',
 'test/precision': '0.6157 ± 0.0349',
 'test/recall': '0.3963 ± 0.0278',
 'train/loss': '0.0952 ± 0.0226',
 'train/f1': '0.9695 ± 0.0100',
 'train/acc': '0.9947 ± 0.0018',
 'train/precision': '0.9463 ± 0.0189',
 'train/recall': '0.9940 ± 0.0016',
 'train_time': '86.6509 ± 10.9041'}

In [2]:
embeddings_model = fasttext.load_model(str(SG_CORPUS))

dataset = WordDataset(PROBLEM_TEST, embeddings_model.get_word_vector, 32)
loader = DataLoader(
    dataset,
    batch_size=128,
    pin_memory=True,
    shuffle=False,
    num_workers=0,
    drop_last=True,
)

checkpoint_file = CHECKPOINTS_DIR / "cnn_corpus_1.ckpt"
model = CNNModel.load_from_checkpoint(
    checkpoint_file,
    conv_kernels=[3, 4, 5],
    conv_filter=100,
    head_dim=300,
    sentence_length=32,
    learning_rate=1e-5,
).cuda()



In [3]:
test_inference_time(model, loader)

'0.0014 ± 0.0192'

In [4]:
calculate_memory_usage(model)

'1.725 MB'