In [1]:
import time
from statistics import mean, stdev

import fasttext
import torch
from torch.utils.data import DataLoader

from config import SG_CORPUS, SG_FULL, CHECKPOINTS_DIR, PROBLEM_TEST
from data import HatefulTweets, WordDataset
from experiment import (
    run_repeated_cnn,
    test_inference_time,
    calculate_memory_usage,
    check_errors,
)
from nn import CNNModel

In [2]:
run_repeated_cnn(SG_CORPUS, name="cnn_corpus")

Global seed set to 1
Global seed set to 2
Global seed set to 3
Global seed set to 4
Global seed set to 5
Global seed set to 6
Global seed set to 7
Global seed set to 8
Global seed set to 9
Global seed set to 10


{'test/loss': '0.3190 ± 0.0112',
 'test/f1': '0.5002 ± 0.0287',
 'test/acc': '0.8954 ± 0.0062',
 'test/precision': '0.6982 ± 0.0521',
 'test/recall': '0.3910 ± 0.0295',
 'train/loss': '0.0782 ± 0.0295',
 'train/f1': '0.9741 ± 0.0179',
 'train/acc': '0.9955 ± 0.0032',
 'train/precision': '0.9560 ± 0.0325',
 'train/recall': '0.9934 ± 0.0037',
 'train_time': '95.4654 ± 10.1520'}

In [3]:
run_repeated_cnn(SG_FULL, name="cnn_full")

Global seed set to 1
Global seed set to 2
Global seed set to 3
Global seed set to 4
Global seed set to 5
Global seed set to 6
Global seed set to 7
Global seed set to 8
Global seed set to 9
Global seed set to 10


{'test/loss': '0.3371 ± 0.0126',
 'test/f1': '0.4822 ± 0.0288',
 'test/acc': '0.8867 ± 0.0057',
 'test/precision': '0.6225 ± 0.0372',
 'test/recall': '0.3940 ± 0.0288',
 'train/loss': '0.0883 ± 0.0235',
 'train/f1': '0.9723 ± 0.0135',
 'train/acc': '0.9952 ± 0.0024',
 'train/precision': '0.9517 ± 0.0240',
 'train/recall': '0.9940 ± 0.0028',
 'train_time': '89.5961 ± 5.8525'}

In [2]:
embeddings_model = fasttext.load_model(str(SG_CORPUS))

dataset = WordDataset(PROBLEM_TEST, embeddings_model.get_word_vector, 32)
loader = DataLoader(
    dataset,
    batch_size=128,
    pin_memory=True,
    drop_last=True,
)

checkpoint_file = CHECKPOINTS_DIR / "cnn_corpus_1.ckpt"
model = CNNModel.load_from_checkpoint(
    checkpoint_file,
    conv_kernels=[3, 4, 5],
    conv_filter=100,
    head_dim=300,
    sentence_length=32,
    learning_rate=1e-5,
).cuda()



In [6]:
test_inference_time(model, loader)

'0.0008 ± 0.0010'

In [4]:
calculate_memory_usage(model)

'1.725 MB'

In [5]:
loader = DataLoader(
    dataset,
    batch_size=128,
    pin_memory=True,
)

check_errors(model, PROBLEM_TEST, loader)

Predicted correctly: 894
Predicted incorrectly: 106

Non-hate tweets predicted as hate: 30
Most misclassified examples:
	Prob: 1.000 	Text: 'celny snajperski strzał w lewacką chołotę'
	Prob: 1.000 	Text: 'ten to już zupełnie odwiesił mózg na kołek chory mózg'
	Prob: 0.998 	Text: 'jestem ukrainskim żydem z polskim obywatelstwem tnij może jedna ci starczy'
	Prob: 0.994 	Text: 'trzeba być patriotą swojego miasta swojego regionu swojej ziemi prezes j w'
	Prob: 0.994 	Text: 'rt trzeba być patriotą swojego miasta swojego regionu swojej ziemi prezes j w'
	Prob: 0.967 	Text: 'polacy ratujmy polskę od zlodzieji po i lisa woljsdojcza'
	Prob: 0.942 	Text: 'pis już się zbliża już puka do twych drzwipobiegnij go przywitać z radości serce drży'
	Prob: 0.936 	Text: 'rt polacy ratujmy polskę od zlodzieji po i lisa woljsdojcza'
	Prob: 0.907 	Text: '“ brzydka z makijażem brzydka bez makijażu brzydka rano brzydka wieczorem brzydka'
	Prob: 0.899 	Text: 'droga pkamilko leczyć się leczyć póki czas'

Hate tw