In [1]:
import pickle
import tools.task3_scorer_onefile
from utils import *
import numpy as np
import logging

import pandas as pd
import torch
from pytorch_pretrained_bert import (BasicTokenizer, BertConfig,
                                     BertForTokenClassification, BertTokenizer)
from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support as f1
from sklearn.model_selection import train_test_split
from torch.optim import Adam
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, WeightedRandomSampler, TensorDataset
from tqdm import tqdm_notebook as tqdm
from tqdm import trange
import os 
import itertools

import matplotlib.pyplot as plt 

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
def make_logger() -> None:
    if not os.path.exists("./exp/{}/{}".format(opt["classType"], opt["expID"])):
            try:
                os.mkdir("./exp/{}/{}".format(opt["classType"], opt["expID"]))
            except FileNotFoundError:
                os.mkdir("./exp/{}".format(opt["classType"]))
                os.mkdir("./exp/{}/{}".format(opt["classType"], opt["expID"]))
    
    logging.basicConfig(
    filename= ("./exp/{}/{}/log.txt".format(opt["classType"], opt["expID"])),
    filemode='a',
    level=logging.INFO,
    format='%(asctime)s, %(message)s')

    logging.getLogger().addHandler(logging.StreamHandler())

In [2]:

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(
            model.state_dict(), './exp/{}/{}/best_model.pth'.format(opt["classType"], opt["expID"]))
        #torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss


## Preprocess

In [3]:
import pandas as pd
import pickle
import ast
from utils import *


def pad_sequences(sequences: list, batch_first: bool = True, padding_value: int = 0, max_len: int = 0):
    tmp = torch.Tensor(sequences[0])
    max_size = tmp.size()
    trailing_dims = max_size[1:]
    
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims

    out_tensor = tmp.data.new(*out_dims).fill_(padding_value)
    for i, list in enumerate(sequences):
        tensor = torch.Tensor(list)
        length = tensor.size(0)
        if batch_first:
            out_tensor[i, :length, ...] = tensor
        else:
            out_tensor[:length, i, ...] = tensor

    return out_tensor.long().numpy()

# def set_global_label(bio: bool = False) -> None:
#     global hash_token
#     global end_token 
#     if bio:
#         hash_token = 3
#         end_token = 4
#     else:
#         hash_token = 2
#         end_token = 3

def reg_encoding(cleaned: list, labels: list, hash_token, end_token) -> list:
    label_l = []
    for oindex, x in enumerate(cleaned):
        tlist = []
        for index, j in enumerate(x):
            for s in j:
                if s[0]=='#':
                    tlist.append(hash_token)
                else:
                    tlist.append(labels[oindex][index])
        label_l.append(tlist)
    return label_l

def bio_encoding(cleaned: list, labels: list) -> list:
    offset = 1
    
    label_l = []
    for oindex, x in enumerate(cleaned):
        tlist = []
        prev=labels[oindex][0]
        for index, j in enumerate(x):
            #if index==30:
            #ipdb.set_trace()
            for s in j:
                if s[0]=='#':
                    tlist.append(hash_token)
                else:
                    if (index==0 and labels[oindex][index]!=0):
                        tlist.append(labels[oindex][index]+offset)
                        prev = labels[oindex][index]
                    if (prev!=labels[oindex][index] and labels[oindex][index]!= 0):
                        tlist.append(labels[oindex][index]+offset)
                        prev = labels[oindex][index]
                    else:
                        tlist.append(labels[oindex][index])
                        prev = labels[oindex][index]
        label_l.append(tlist)
    return label_l

def concatenate_list_data(cleaned: list) -> list:
    result= []
    for element in cleaned:
        result += element
    return result

def make_set(p2id, data_dir: str, tokenizer, single_class: str, 
             hash_token, end_token, bio: bool = False) -> list: 
    #dataset = pd.read_csv(data_dir, sep='\t', header=None, converters={1:ast.literal_eval, 2:ast.literal_eval})
    data_dict = pickle.load(open(data_dir, "rb"))
    
    dataset = corpus2list(p2id, data_dict["ID"], data_dict["Text"],
                              data_dict["Label"], single_class, bio)
    # Shuffle samples
    #dataset = dataset.sample(frac=1)
    terms = list(dataset[1])
    labels = list(dataset[2])
    
    cleaned = [[tokenizer.tokenize(words) for words in sent] for sent in terms]
    tokenized_texts = [concatenate_list_data(sent) for sent in cleaned]
    if bio:
        label_l = bio_encoding(cleaned, labels)
    else:
        label_l = reg_encoding(cleaned, labels, hash_token, end_token)

    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          padding_value=0.0, max_len=opt["maxLen"])
    
    tags = pad_sequences(label_l, padding_value=end_token, max_len=opt["maxLen"])
    attention_masks = [[float(i>0) for i in ii] for ii in input_ids]
    
    
    return input_ids, tags, attention_masks, label_l


def make_val_set(p2id, data_dir: str, tokenizer, single_class: str, 
             hash_token, end_token, bio: bool = False) -> list: 
    #dataset = pd.read_csv(data_dir, sep='\t', header=None, converters={1:ast.literal_eval, 2:ast.literal_eval})
    data_dict = pickle.load(open(data_dir, "rb"))
    if not bio:
        dataset = corpus2list(p2id, data_dict["ID"], data_dict["Text"],
                              data_dict["Label"], single_class, bio)
    # Shuffle samples
    #dataset = dataset.sample(frac=1)
    ids = (dataset[0])
    terms = (dataset[1])
    labels = (dataset[2])
    spacy = (dataset[3])
    cleaned = [[tokenizer.tokenize(words) for words in sent] for sent in terms]
    tokenized_texts = [concatenate_list_data(sent) for sent in cleaned]

    if bio:
        label_l = bio_encoding(cleaned, labels)
    else:
        label_l = reg_encoding(cleaned, labels, hash_token, end_token)

    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          padding_value=0.0, max_len=opt["maxLen"])
    
    tags = pad_sequences(label_l, padding_value=end_token, max_len=opt["maxLen"])
    attention_masks = [[float(i>0) for i in ii] for ii in input_ids]
    
    
    return input_ids, tags, attention_masks, cleaned, ids, terms, spacy, label_l


In [13]:
opt = {
    "techniques" : "tools/data/propaganda-techniques-names.txt",
    "binaryLabel" : False,
    "nLabels" : 21,
    "lowerCase" : False,
    "bio" : False,
    "model" : "bert-base-cased",
    "trainDataset" : "train.p",
    "evalDataset" : "test.p",
    "trainBatch" :16,
    "nEpochs" :5,
    "LR" :3e-5,
    "loadModel" : "exp/all_class/all_class_proper/4/model_4.pth",
    "expID" :"spacy_viz",
    "classType" : "all_class",
    "testData" : "datasets-v5",
    "train" : True,
    "maxLen" : 210,
    "patience" : 2,
    "testDataset" : "datasets-v5/tasks-2-3/test",
    "snapshot" : 2
}

In [9]:
ls exp/all_class/all_class_proper/4

model_4.pth  optimizer.pth  option.pth  pred.tasks-2-3  score.tasks-2-3


In [5]:
def get_task2(predictions):
    preddi = []
    found = False
    for x in predictions:
        for j in x:
            if j==1:
                preddi.append(1)
                found = True
                break
        if not found:
            preddi.append(0)
        found = False
    return preddi

In [31]:
#os.environ['CUDA_VISIBLE_DEVICES']='0,1,2,3,4'
prop_tech_e, prop_tech, hash_token, end_token, p2id = settings(opt["techniques"], opt["binaryLabel"], opt["bio"])
logging.info("Training for class %s" % (opt["binaryLabel"]))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count(); 
logging.info("GPUs Detected: %s" % (n_gpu))
scorred_labels = list(range(1,(opt["nLabels"]-2)))

tokenizer = BertTokenizer.from_pretrained(opt["model"], do_lower_case=opt["lowerCase"]);
print (hash_token, end_token)
# Load Tokenized train and validation datasets
tr_inputs, tr_tags, tr_masks, _ = make_set(p2id, opt["trainDataset"], tokenizer, opt["binaryLabel"], hash_token, end_token)
val_inputs, val_tags, val_masks, cleaned, flat_list_i, flat_list, flat_list_s,_ = make_val_set(p2id, opt["evalDataset"],
                                                                                         tokenizer, opt["binaryLabel"], hash_token, end_token)
printable = tr_tags
# ids, texts, _ = read_data(opt["testDataset"], isLabels = False)
# flat_list_i, flat_list, flat_list_s = test2list(ids, texts)
truth_task2 = get_task2(val_tags)

logging.info("Dataset loaded")
logging.info("Labels detected in train dataset: %s" % (np.unique(tr_tags)))
logging.info("Labels detected in val dataset: %s" % (np.unique(val_tags)))

# Balanced Sampling
total_tags = np.zeros((opt["nLabels"],))
for x in tr_tags:
     total_tags = total_tags+np.bincount(x)

probs = 1./total_tags
train_tokenweights = probs[tr_tags]
weightage = np.sum(train_tokenweights, axis=1)
   # Alternate method for weighting
ws = np.ones((opt["nLabels"],))
ws[0] = 0

ws[hash_token] = 0
ws[end_token] = 0
ws = ws+0.3
prob = [max(x) for x in ws[tr_tags]]
weightage = [x + y for x, y in zip(prob, (len(prob)*[0.1]))]    

# Convert to pyTorch tensors
tr_inputs = torch.tensor(tr_inputs)
val_inputs = torch.tensor(val_inputs)
tr_tags = torch.tensor(tr_tags)
val_tags = torch.tensor(val_tags)
tr_masks = torch.tensor(tr_masks)
val_masks = torch.tensor(val_masks)

# Create Dataloaders
train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)
#train_sampler = WeightedRandomSampler(weights=weightage, num_samples=len(tr_tags),replacement=True)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=opt["trainBatch"])

valid_data = TensorDataset(val_inputs, val_masks, val_tags)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=opt["trainBatch"])

# Model Initialize
model = BertForTokenClassification.from_pretrained(opt["model"], num_labels=opt["nLabels"]);

loss_scale = 0
warmup_proportion = 0.1
num_train_optimization_steps = int(len(train_data) / opt["trainBatch"] ) * opt["nEpochs"]

# Prepare optimizer
param_optimizer = list(model.named_parameters())

# hack to remove pooler, which is not usedpython train.py --expID test --trainDataset dataset_train.csv --evalDataset dataset_dev.csv --model bert-base-cased --LR 3e-5 --trainBatch 12 --nEpochs 1
# thus it produce None grad that break apex
param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
# t_total matters
optimizer = BertAdam(optimizer_grouped_parameters,
                     lr=opt["LR"],
                     warmup=warmup_proportion,
                     t_total=num_train_optimization_steps) 

model.to(device)

if n_gpu > 1:
    model = torch.nn.DataParallel(model)
    logging.info("Training beginning on: %s" % n_gpu)

if opt["loadModel"]:
    print('Loading Model from {}'.format(opt["loadModel"]))
    model.load_state_dict(torch.load(opt["loadModel"]))
    if not os.path.exists("./exp/{}/{}".format(opt["classType"], opt["expID"])):
        try:
            os.mkdir("./exp/{}/{}".format(opt["classType"], opt["expID"]))
        except FileNotFoundError:
            os.mkdir("./exp/{}".format(opt["classType"]))
            os.mkdir("./exp/{}/{}".format(opt["classType"], opt["expID"]))
else:
    print('Create new model')
    if not os.path.exists("./exp/{}/{}".format(opt["classType"], opt["expID"])):
        try:
            os.mkdir("./exp/{}/{}".format(opt["classType"], opt["expID"]))
        except FileNotFoundError:
            os.mkdir("./exp/{}".format(opt["classType"]))
            os.mkdir("./exp/{}/{}".format(opt["classType"], opt["expID"]))

# F1 score shouldn't consider no-propaganda
# and other auxiliary labels

global_step = 0
nb_tr_steps = 0
tr_loss = 0
max_grad_norm = 1.0
best = 0
early_stopping = EarlyStopping(patience=opt["patience"], verbose=True)
train_losses = []
valid_losses = []
f1_scores = []
f1_scores_word = []
task2_scores = []
for i in trange(opt["nEpochs"], desc="Epoch"):
    # TRAIN loop
    # Start only if train flag was passed
    if (opt["train"]):
        model.train()
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
            if n_gpu == 1:
                batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            # forward pass
            loss = model(b_input_ids, token_type_ids=None,
                        attention_mask=b_input_mask, labels=b_labels)
            if n_gpu > 1:
                loss = loss.mean()

            # backward pass
            loss.backward()

            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1

            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        logging.info(f'EPOCH {i} done: Train Loss {(tr_loss/nb_tr_steps)}')
        train_losses.append(tr_loss/nb_tr_steps)

    # Evaluation on validation set or test set
    model.eval()
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions , true_labels = [], []
    for batch in tqdm(valid_dataloader, desc="Evaluating"):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            tmp_eval_loss = model(b_input_ids, token_type_ids=None,
                                attention_mask=b_input_mask, labels=b_labels)
            logits = model(b_input_ids, token_type_ids=None,
                        attention_mask=b_input_mask)
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        true_labels.append(label_ids)

        #tmp_eval_accuracy = flat_accuracy(logits, label_ids)

        eval_loss += tmp_eval_loss.mean().item()
        #eval_accuracy += tmp_eval_accuracy

        nb_eval_examples += b_input_ids.size(0)
        nb_eval_steps += 1
    pred_task2 = get_task2(predictions)
    logging.info("Precision, Recall, F1-Score, Support Task2: {}".format(f1(pred_task2, truth_task2, average=None)))
    f1_macro = f1_score(pred_task2, truth_task2, labels=scorred_labels, average="macro")
    task2_scores.append(f1_macro)
    pickle.dump(printable, open( "output_.p", "wb"))
    eval_loss = eval_loss/nb_eval_steps
    logging.info("Validation loss: %s" % (eval_loss))    
    logging.info("Precision, Recall, F1-Score, Support: {}".format(f1(list(itertools.chain(*predictions)), list(itertools.chain(*val_tags)), average=None)))
    f1_macro = f1_score(list(itertools.chain(*predictions)), list(itertools.chain(*val_tags)), labels=scorred_labels, average="macro")
    logging.info("F1 Macro Dev Set: %s" % f1_macro)
    logging.info("Learning Rate: %s" % (optimizer.get_lr()[0]))
    valid_losses.append(eval_loss)
    f1_scores_word.append(f1_macro)

    df = get_char_level(flat_list_i, flat_list_s, predictions, cleaned, hash_token, end_token, prop_tech)
    postfix = opt["testDataset"].rsplit('/', 2)[-2]
    if opt["loadModel"]:
        out_dir = opt["loadModel"].rsplit('/', 1)[0] + "/pred." + postfix
    else:
        out_dir = ("exp/{}/{}/temp_pred.csv".format(opt["classType"], opt["expID"]))
    df.to_csv(out_dir, sep='\t', index=False, header=False) 
    logging.info("Predictions written to: %s" % (out_dir))

    if opt["loadModel"]:
        out_file = opt["loadModel"].rsplit('/', 1)[0] + "/score." + postfix
    else:
        out_file = ("exp/{}/{}/temp_score.csv".format(opt["classType"], opt["expID"]))

    if opt["classType"] != "binary":
        char_predict = tools.task3_scorer_onefile.main(["-s", out_dir, "-r", opt["testDataset"], "-t", opt["techniques"], "-l", out_file])
    else:
        char_predict = tools.task3_scorer_onefile.main(["-s", out_dir, "-r", opt["testDataset"], "-t", opt["techniques"], "-f", "-l", out_file])
    f1_scores.append(char_predict) 
    print (char_predict)

    # early_stopping needs the validation loss to check if it has decresed, 
    # and if it has, it will make a checkpoint of the current model
    if not opt["train"]:
        break
    early_stopping(char_predict*(-1), model)

    if early_stopping.early_stop:
        logging.info("Early stopping")
        break
    # Save checkpoints
    if i % opt["snapshot"] == 0:
        if not os.path.exists("./exp/{}/{}/{}".format(opt["classType"], opt["expID"], i)):
            try:
                os.mkdir("./exp/{}/{}/{}".format(opt["classType"], opt["expID"], i))
            except FileNotFoundError:
                os.mkdir("./exp/{}/{}/{}".format(opt["classType"], opt["expID"], i))
        torch.save(
            model.state_dict(), './exp/{}/{}/{}/model_{}.pth'.format(opt["classType"], opt["expID"], i, i))
        torch.save(
            opt, './exp/{}/{}/{}/option.pth'.format(opt["classType"], opt["expID"], i))
        torch.save(
            optimizer, './exp/{}/{}/{}/optimizer.pth'.format(opt["classType"], opt["expID"], i))


    # Save model based on best F1 score and if epoch is greater than 3
    '''if f1_macro > best and i > 3:
    # Save a trained model and the associated configuration
        torch.save(
            model.state_dict(), './exp/{}/{}/best_model.pth'.format(opt["classType"], opt["expID"]))
        torch.save(
            opt, './exp/{}/{}/option.pth'.format(opt["classType"], opt["expID"]))
        torch.save(
            optimizer, './exp/{}/{}/optimizer.pth'.format(opt["classType"], opt["expID"]))
        #model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
        #output_model_file = os.path.join("./exp/{}/{}".format(opt["classType"], opt["expID"]), "best_model.pth")
        #torch.save(model_to_save.state_dict(), output_model_file)
        best = f1_macro
        logging.info("New best model")
    '''
if opt["train"]:
    logging.info("Training Finished. Learning curves saved.")
    draw_curves(train_losses, valid_losses, f1_scores, f1_scores_word, task2_scores)
    #df = pd.DataFrame({'col':trainlosses})
    #df.to_csv("trainlosses.csv", sep='\t', index=False, header=False) 
    #df = pd.DataFrame({'col':validlosses})
    #df.to_csv("validlosses.csv", sep='\t', index=False, header=False) 
    #df = pd.DataFrame({'col':f1scores})
    #df.to_csv("f1scores.csv", sep='\t', index=False, header=False) 



19 20
{'O': 0, 'Appeal_to_Authority': 1, 'Appeal_to_fear-prejudice': 2, 'Bandwagon': 3, 'Black-and-White_Fallacy': 4, 'Causal_Oversimplification': 5, 'Doubt': 6, 'Exaggeration,Minimisation': 7, 'Flag-Waving': 8, 'Loaded_Language': 9, 'Name_Calling,Labeling': 10, 'Obfuscation,Intentional_Vagueness,Confusion': 11, 'Red_Herring': 12, 'Reductio_ad_hitlerum': 13, 'Repetition': 14, 'Slogans': 15, 'Straw_Men': 16, 'Thought-terminating_Cliches': 17, 'Whataboutism': 18}
{'O': 0, 'Appeal_to_Authority': 1, 'Appeal_to_fear-prejudice': 2, 'Bandwagon': 3, 'Black-and-White_Fallacy': 4, 'Causal_Oversimplification': 5, 'Doubt': 6, 'Exaggeration,Minimisation': 7, 'Flag-Waving': 8, 'Loaded_Language': 9, 'Name_Calling,Labeling': 10, 'Obfuscation,Intentional_Vagueness,Confusion': 11, 'Red_Herring': 12, 'Reductio_ad_hitlerum': 13, 'Repetition': 14, 'Slogans': 15, 'Straw_Men': 16, 'Thought-terminating_Cliches': 17, 'Whataboutism': 18}







Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A

Create new model


HBox(children=(IntProgress(value=0, description='Iteration', max=870, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Evaluating', max=128, style=ProgressStyle(description_width='…

0
Counter check:  0
{'submission': 'exp/all_class/spacy_viz/temp_pred.csv', 'gold': 'datasets-v5/tasks-2-3/test', 'debug_on_std': False, 'techniques_file': 'tools/data/propaganda-techniques-names.txt', 'log_file': 'exp/all_class/spacy_viz/temp_score.csv', 'fragments_only': False}
2019-06-19 08:50:46,250 - INFO - Logging execution to file exp/all_class/spacy_viz/temp_score.csv


INFO:propaganda_scorer:Logging execution to file exp/all_class/spacy_viz/temp_score.csv


2019-06-19 08:50:46,252 - INFO - Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test
DEBUG:propaganda_scorer:OK: all article ids have a correspondence in the list of articles from the reference dataset


2019-06-19 08:50:46,267 - INFO - Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


2019-06-19 08:50:46,271 - INFO - Precision=12.558933/41=0.306315	Recall=12.558933/927=0.013548


INFO:propaganda_scorer:Precision=12.558933/41=0.306315	Recall=12.558933/927=0.013548


2019-06-19 08:50:46,272 - INFO - F1=0.025948


INFO:propaganda_scorer:F1=0.025948


2019-06-19 08:50:46,275 - INFO - Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,276 - INFO - Appeal_to_fear-prejudice: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Appeal_to_fear-prejudice: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,278 - INFO - Bandwagon: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Bandwagon: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,279 - INFO - Black-and-White_Fallacy: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Black-and-White_Fallacy: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,280 - INFO - Causal_Oversimplification: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Causal_Oversimplification: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,282 - INFO - Doubt: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Doubt: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,284 - INFO - Exaggeration,Minimisation: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Exaggeration,Minimisation: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,285 - INFO - Flag-Waving: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Flag-Waving: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,286 - INFO - Loaded_Language: P=0.338505 R=0.036746 F1=0.066295


INFO:propaganda_scorer:Loaded_Language: P=0.338505 R=0.036746 F1=0.066295


2019-06-19 08:50:46,287 - INFO - Name_Calling,Labeling: P=0.173532 R=0.009015 F1=0.017139


INFO:propaganda_scorer:Name_Calling,Labeling: P=0.173532 R=0.009015 F1=0.017139


2019-06-19 08:50:46,290 - INFO - Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,291 - INFO - Red_Herring: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Red_Herring: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,293 - INFO - Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,295 - INFO - Repetition: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Repetition: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,296 - INFO - Slogans: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Slogans: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,298 - INFO - Straw_Men: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Straw_Men: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,299 - INFO - Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:50:46,301 - INFO - Whataboutism: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Whataboutism: P=0.000000 R=0.000000 F1=0.000000


0.025948208538716742
Validation loss decreased (inf --> -0.025948).  Saving model ...







Epoch:  20%|██        | 1/5 [08:50<35:22, 530.56s/it][A[A[A[A[A

HBox(children=(IntProgress(value=0, description='Iteration', max=870, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Evaluating', max=128, style=ProgressStyle(description_width='…

0
Counter check:  0
{'submission': 'exp/all_class/spacy_viz/temp_pred.csv', 'gold': 'datasets-v5/tasks-2-3/test', 'debug_on_std': False, 'techniques_file': 'tools/data/propaganda-techniques-names.txt', 'log_file': 'exp/all_class/spacy_viz/temp_score.csv', 'fragments_only': False}
2019-06-19 08:59:39,433 - INFO - Logging execution to file exp/all_class/spacy_viz/temp_score.csv


INFO:propaganda_scorer:Logging execution to file exp/all_class/spacy_viz/temp_score.csv


2019-06-19 08:59:39,436 - INFO - Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test
DEBUG:propaganda_scorer:OK: all article ids have a correspondence in the list of articles from the reference dataset


2019-06-19 08:59:39,450 - INFO - Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


2019-06-19 08:59:39,463 - INFO - Precision=55.042153/316=0.174184	Recall=55.042153/927=0.059377


INFO:propaganda_scorer:Precision=55.042153/316=0.174184	Recall=55.042153/927=0.059377


2019-06-19 08:59:39,465 - INFO - F1=0.088563


INFO:propaganda_scorer:F1=0.088563


2019-06-19 08:59:39,466 - INFO - Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,468 - INFO - Appeal_to_fear-prejudice: P=0.030016 R=0.043293 F1=0.035452


INFO:propaganda_scorer:Appeal_to_fear-prejudice: P=0.030016 R=0.043293 F1=0.035452


2019-06-19 08:59:39,469 - INFO - Bandwagon: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Bandwagon: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,470 - INFO - Black-and-White_Fallacy: P=0.194744 R=0.113601 F1=0.143496


INFO:propaganda_scorer:Black-and-White_Fallacy: P=0.194744 R=0.113601 F1=0.143496


2019-06-19 08:59:39,472 - INFO - Causal_Oversimplification: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Causal_Oversimplification: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,473 - INFO - Doubt: P=0.054472 R=0.018699 F1=0.027841


INFO:propaganda_scorer:Doubt: P=0.054472 R=0.018699 F1=0.027841


2019-06-19 08:59:39,475 - INFO - Exaggeration,Minimisation: P=0.068641 R=0.011016 F1=0.018986


INFO:propaganda_scorer:Exaggeration,Minimisation: P=0.068641 R=0.011016 F1=0.018986


2019-06-19 08:59:39,476 - INFO - Flag-Waving: P=0.170811 R=0.165787 F1=0.168261


INFO:propaganda_scorer:Flag-Waving: P=0.170811 R=0.165787 F1=0.168261


2019-06-19 08:59:39,478 - INFO - Loaded_Language: P=0.300121 R=0.106622 F1=0.157345


INFO:propaganda_scorer:Loaded_Language: P=0.300121 R=0.106622 F1=0.157345


2019-06-19 08:59:39,479 - INFO - Name_Calling,Labeling: P=0.197065 R=0.072940 F1=0.106471


INFO:propaganda_scorer:Name_Calling,Labeling: P=0.197065 R=0.072940 F1=0.106471


2019-06-19 08:59:39,480 - INFO - Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,482 - INFO - Red_Herring: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Red_Herring: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,483 - INFO - Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,485 - INFO - Repetition: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Repetition: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,486 - INFO - Slogans: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Slogans: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,488 - INFO - Straw_Men: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Straw_Men: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,489 - INFO - Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 08:59:39,491 - INFO - Whataboutism: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Whataboutism: P=0.000000 R=0.000000 F1=0.000000


0.08856340021423827
Validation loss decreased (-0.025948 --> -0.088563).  Saving model ...







Epoch:  40%|████      | 2/5 [17:33<26:24, 528.28s/it][A[A[A[A[A

HBox(children=(IntProgress(value=0, description='Iteration', max=870, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Evaluating', max=128, style=ProgressStyle(description_width='…

8
Counter check:  8
{'submission': 'exp/all_class/spacy_viz/temp_pred.csv', 'gold': 'datasets-v5/tasks-2-3/test', 'debug_on_std': False, 'techniques_file': 'tools/data/propaganda-techniques-names.txt', 'log_file': 'exp/all_class/spacy_viz/temp_score.csv', 'fragments_only': False}
2019-06-19 09:08:21,989 - INFO - Logging execution to file exp/all_class/spacy_viz/temp_score.csv


INFO:propaganda_scorer:Logging execution to file exp/all_class/spacy_viz/temp_score.csv


2019-06-19 09:08:21,993 - INFO - Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test
DEBUG:propaganda_scorer:OK: all article ids have a correspondence in the list of articles from the reference dataset


2019-06-19 09:08:22,008 - INFO - Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


2019-06-19 09:08:22,025 - INFO - Precision=88.102087/444=0.198428	Recall=88.102087/927=0.095040


INFO:propaganda_scorer:Precision=88.102087/444=0.198428	Recall=88.102087/927=0.095040


2019-06-19 09:08:22,027 - INFO - F1=0.128522


INFO:propaganda_scorer:F1=0.128522


2019-06-19 09:08:22,028 - INFO - Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,030 - INFO - Appeal_to_fear-prejudice: P=0.029601 R=0.040416 F1=0.034173


INFO:propaganda_scorer:Appeal_to_fear-prejudice: P=0.029601 R=0.040416 F1=0.034173


2019-06-19 09:08:22,032 - INFO - Bandwagon: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Bandwagon: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,033 - INFO - Black-and-White_Fallacy: P=0.398496 R=0.166040 F1=0.234410


INFO:propaganda_scorer:Black-and-White_Fallacy: P=0.398496 R=0.166040 F1=0.234410


2019-06-19 09:08:22,035 - INFO - Causal_Oversimplification: P=0.112228 R=0.057924 F1=0.076411


INFO:propaganda_scorer:Causal_Oversimplification: P=0.112228 R=0.057924 F1=0.076411


2019-06-19 09:08:22,036 - INFO - Doubt: P=0.050000 R=0.014925 F1=0.022989


INFO:propaganda_scorer:Doubt: P=0.050000 R=0.014925 F1=0.022989


2019-06-19 09:08:22,038 - INFO - Exaggeration,Minimisation: P=0.166816 R=0.026773 F1=0.046141


INFO:propaganda_scorer:Exaggeration,Minimisation: P=0.166816 R=0.026773 F1=0.046141


2019-06-19 09:08:22,039 - INFO - Flag-Waving: P=0.188353 R=0.182813 F1=0.185542


INFO:propaganda_scorer:Flag-Waving: P=0.188353 R=0.182813 F1=0.185542


2019-06-19 09:08:22,041 - INFO - Loaded_Language: P=0.285243 R=0.149189 F1=0.195912


INFO:propaganda_scorer:Loaded_Language: P=0.285243 R=0.149189 F1=0.195912


2019-06-19 09:08:22,043 - INFO - Name_Calling,Labeling: P=0.275655 R=0.159307 F1=0.201920


INFO:propaganda_scorer:Name_Calling,Labeling: P=0.275655 R=0.159307 F1=0.201920


2019-06-19 09:08:22,044 - INFO - Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,045 - INFO - Red_Herring: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Red_Herring: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,047 - INFO - Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,048 - INFO - Repetition: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Repetition: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,050 - INFO - Slogans: P=0.420168 R=0.183824 F1=0.255754


INFO:propaganda_scorer:Slogans: P=0.420168 R=0.183824 F1=0.255754


2019-06-19 09:08:22,051 - INFO - Straw_Men: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Straw_Men: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,052 - INFO - Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:08:22,054 - INFO - Whataboutism: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Whataboutism: P=0.000000 R=0.000000 F1=0.000000


0.12852237319345247
Validation loss decreased (-0.088563 --> -0.128522).  Saving model ...







Epoch:  60%|██████    | 3/5 [26:26<17:39, 529.63s/it][A[A[A[A[A

HBox(children=(IntProgress(value=0, description='Iteration', max=870, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Evaluating', max=128, style=ProgressStyle(description_width='…

21
Counter check:  21
{'submission': 'exp/all_class/spacy_viz/temp_pred.csv', 'gold': 'datasets-v5/tasks-2-3/test', 'debug_on_std': False, 'techniques_file': 'tools/data/propaganda-techniques-names.txt', 'log_file': 'exp/all_class/spacy_viz/temp_score.csv', 'fragments_only': False}
2019-06-19 09:17:16,337 - INFO - Logging execution to file exp/all_class/spacy_viz/temp_score.csv


INFO:propaganda_scorer:Logging execution to file exp/all_class/spacy_viz/temp_score.csv


2019-06-19 09:17:16,342 - INFO - Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test
DEBUG:propaganda_scorer:OK: all article ids have a correspondence in the list of articles from the reference dataset


2019-06-19 09:17:16,358 - INFO - Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


2019-06-19 09:17:16,380 - INFO - Precision=107.799404/640=0.168437	Recall=107.799404/927=0.116288


INFO:propaganda_scorer:Precision=107.799404/640=0.168437	Recall=107.799404/927=0.116288


2019-06-19 09:17:16,381 - INFO - F1=0.137587


INFO:propaganda_scorer:F1=0.137587


2019-06-19 09:17:16,383 - INFO - Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,384 - INFO - Appeal_to_fear-prejudice: P=0.032392 R=0.040490 F1=0.035991


INFO:propaganda_scorer:Appeal_to_fear-prejudice: P=0.032392 R=0.040490 F1=0.035991


2019-06-19 09:17:16,385 - INFO - Bandwagon: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Bandwagon: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,387 - INFO - Black-and-White_Fallacy: P=0.077595 R=0.168123 F1=0.106183


INFO:propaganda_scorer:Black-and-White_Fallacy: P=0.077595 R=0.168123 F1=0.106183


2019-06-19 09:17:16,388 - INFO - Causal_Oversimplification: P=0.057814 R=0.063408 F1=0.060482


INFO:propaganda_scorer:Causal_Oversimplification: P=0.057814 R=0.063408 F1=0.060482


2019-06-19 09:17:16,390 - INFO - Doubt: P=0.047133 R=0.036581 F1=0.041192


INFO:propaganda_scorer:Doubt: P=0.047133 R=0.036581 F1=0.041192


2019-06-19 09:17:16,391 - INFO - Exaggeration,Minimisation: P=0.069606 R=0.021483 F1=0.032833


INFO:propaganda_scorer:Exaggeration,Minimisation: P=0.069606 R=0.021483 F1=0.032833


2019-06-19 09:17:16,393 - INFO - Flag-Waving: P=0.110601 R=0.162649 F1=0.131668


INFO:propaganda_scorer:Flag-Waving: P=0.110601 R=0.162649 F1=0.131668


2019-06-19 09:17:16,394 - INFO - Loaded_Language: P=0.293046 R=0.184118 F1=0.226149


INFO:propaganda_scorer:Loaded_Language: P=0.293046 R=0.184118 F1=0.226149


2019-06-19 09:17:16,396 - INFO - Name_Calling,Labeling: P=0.234477 R=0.213160 F1=0.223311


INFO:propaganda_scorer:Name_Calling,Labeling: P=0.234477 R=0.213160 F1=0.223311


2019-06-19 09:17:16,398 - INFO - Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,399 - INFO - Red_Herring: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Red_Herring: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,401 - INFO - Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,402 - INFO - Repetition: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Repetition: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,404 - INFO - Slogans: P=0.398897 R=0.199449 F1=0.265931


INFO:propaganda_scorer:Slogans: P=0.398897 R=0.199449 F1=0.265931


2019-06-19 09:17:16,405 - INFO - Straw_Men: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Straw_Men: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,407 - INFO - Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:17:16,409 - INFO - Whataboutism: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Whataboutism: P=0.000000 R=0.000000 F1=0.000000


0.13758698642163678
Validation loss decreased (-0.128522 --> -0.137587).  Saving model ...







Epoch:  80%|████████  | 4/5 [35:10<08:47, 527.98s/it][A[A[A[A[A

HBox(children=(IntProgress(value=0, description='Iteration', max=870, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='Evaluating', max=128, style=ProgressStyle(description_width='…

21
Counter check:  21
{'submission': 'exp/all_class/spacy_viz/temp_pred.csv', 'gold': 'datasets-v5/tasks-2-3/test', 'debug_on_std': False, 'techniques_file': 'tools/data/propaganda-techniques-names.txt', 'log_file': 'exp/all_class/spacy_viz/temp_score.csv', 'fragments_only': False}
2019-06-19 09:25:59,899 - INFO - Logging execution to file exp/all_class/spacy_viz/temp_score.csv


INFO:propaganda_scorer:Logging execution to file exp/all_class/spacy_viz/temp_score.csv


2019-06-19 09:25:59,905 - INFO - Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Checking user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold folder datasets-v5/tasks-2-3/test
DEBUG:propaganda_scorer:OK: all article ids have a correspondence in the list of articles from the reference dataset


2019-06-19 09:25:59,920 - INFO - Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


INFO:propaganda_scorer:Scoring user submitted file exp/all_class/spacy_viz/temp_pred.csv against gold file datasets-v5/tasks-2-3/test


2019-06-19 09:25:59,946 - INFO - Precision=121.558127/752=0.161646	Recall=121.558127/927=0.131131


INFO:propaganda_scorer:Precision=121.558127/752=0.161646	Recall=121.558127/927=0.131131


2019-06-19 09:25:59,947 - INFO - F1=0.144798


INFO:propaganda_scorer:F1=0.144798


2019-06-19 09:25:59,949 - INFO - Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Appeal_to_Authority: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,951 - INFO - Appeal_to_fear-prejudice: P=0.028734 R=0.039786 F1=0.033369


INFO:propaganda_scorer:Appeal_to_fear-prejudice: P=0.028734 R=0.039786 F1=0.033369


2019-06-19 09:25:59,953 - INFO - Bandwagon: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Bandwagon: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,955 - INFO - Black-and-White_Fallacy: P=0.065763 R=0.197290 F1=0.098645


INFO:propaganda_scorer:Black-and-White_Fallacy: P=0.065763 R=0.197290 F1=0.098645


2019-06-19 09:25:59,957 - INFO - Causal_Oversimplification: P=0.070257 R=0.072523 F1=0.071372


INFO:propaganda_scorer:Causal_Oversimplification: P=0.070257 R=0.072523 F1=0.071372


2019-06-19 09:25:59,958 - INFO - Doubt: P=0.062681 R=0.069230 F1=0.065793


INFO:propaganda_scorer:Doubt: P=0.062681 R=0.069230 F1=0.065793


2019-06-19 09:25:59,960 - INFO - Exaggeration,Minimisation: P=0.115749 R=0.057160 F1=0.076528


INFO:propaganda_scorer:Exaggeration,Minimisation: P=0.115749 R=0.057160 F1=0.076528


2019-06-19 09:25:59,962 - INFO - Flag-Waving: P=0.084480 R=0.124235 F1=0.100571


INFO:propaganda_scorer:Flag-Waving: P=0.084480 R=0.124235 F1=0.100571


2019-06-19 09:25:59,963 - INFO - Loaded_Language: P=0.275456 R=0.212029 F1=0.239616


INFO:propaganda_scorer:Loaded_Language: P=0.275456 R=0.212029 F1=0.239616


2019-06-19 09:25:59,965 - INFO - Name_Calling,Labeling: P=0.239982 R=0.219724 F1=0.229406


INFO:propaganda_scorer:Name_Calling,Labeling: P=0.239982 R=0.219724 F1=0.229406


2019-06-19 09:25:59,967 - INFO - Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Obfuscation,Intentional_Vagueness,Confusion: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,968 - INFO - Red_Herring: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Red_Herring: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,970 - INFO - Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Reductio_ad_hitlerum: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,971 - INFO - Repetition: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Repetition: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,973 - INFO - Slogans: P=0.220501 R=0.192938 F1=0.205801


INFO:propaganda_scorer:Slogans: P=0.220501 R=0.192938 F1=0.205801


2019-06-19 09:25:59,975 - INFO - Straw_Men: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Straw_Men: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,976 - INFO - Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Thought-terminating_Cliches: P=0.000000 R=0.000000 F1=0.000000


2019-06-19 09:25:59,978 - INFO - Whataboutism: P=0.000000 R=0.000000 F1=0.000000


INFO:propaganda_scorer:Whataboutism: P=0.000000 R=0.000000 F1=0.000000


0.1447982455156738
Validation loss decreased (-0.137587 --> -0.144798).  Saving model ...







Epoch: 100%|██████████| 5/5 [44:04<00:00, 529.74s/it][A[A[A[A[A

NameError: name 'draw_curves' is not defined

In [15]:
 os.environ['CUDA_VISIBLE_DEVICES']='0,1,2,3,4'

In [16]:
MAX_LEN = 210
bs = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count(); 
logging.info("GPUs Detected: %s" % (n_gpu))

tokenizer = BertTokenizer.from_pretrained(opt["model"], do_lower_case=False);
# Model Initialize
model = BertForTokenClassification.from_pretrained(opt["model"], num_labels=opt["nLabels"]);

model.to(device)

model.load_state_dict(torch.load(opt["loadModel"]))

RuntimeError: Error(s) in loading state_dict for BertForTokenClassification:
	Missing key(s) in state_dict: "bert.embeddings.word_embeddings.weight", "bert.embeddings.position_embeddings.weight", "bert.embeddings.token_type_embeddings.weight", "bert.embeddings.LayerNorm.weight", "bert.embeddings.LayerNorm.bias", "bert.encoder.layer.0.attention.self.query.weight", "bert.encoder.layer.0.attention.self.query.bias", "bert.encoder.layer.0.attention.self.key.weight", "bert.encoder.layer.0.attention.self.key.bias", "bert.encoder.layer.0.attention.self.value.weight", "bert.encoder.layer.0.attention.self.value.bias", "bert.encoder.layer.0.attention.output.dense.weight", "bert.encoder.layer.0.attention.output.dense.bias", "bert.encoder.layer.0.attention.output.LayerNorm.weight", "bert.encoder.layer.0.attention.output.LayerNorm.bias", "bert.encoder.layer.0.intermediate.dense.weight", "bert.encoder.layer.0.intermediate.dense.bias", "bert.encoder.layer.0.output.dense.weight", "bert.encoder.layer.0.output.dense.bias", "bert.encoder.layer.0.output.LayerNorm.weight", "bert.encoder.layer.0.output.LayerNorm.bias", "bert.encoder.layer.1.attention.self.query.weight", "bert.encoder.layer.1.attention.self.query.bias", "bert.encoder.layer.1.attention.self.key.weight", "bert.encoder.layer.1.attention.self.key.bias", "bert.encoder.layer.1.attention.self.value.weight", "bert.encoder.layer.1.attention.self.value.bias", "bert.encoder.layer.1.attention.output.dense.weight", "bert.encoder.layer.1.attention.output.dense.bias", "bert.encoder.layer.1.attention.output.LayerNorm.weight", "bert.encoder.layer.1.attention.output.LayerNorm.bias", "bert.encoder.layer.1.intermediate.dense.weight", "bert.encoder.layer.1.intermediate.dense.bias", "bert.encoder.layer.1.output.dense.weight", "bert.encoder.layer.1.output.dense.bias", "bert.encoder.layer.1.output.LayerNorm.weight", "bert.encoder.layer.1.output.LayerNorm.bias", "bert.encoder.layer.2.attention.self.query.weight", "bert.encoder.layer.2.attention.self.query.bias", "bert.encoder.layer.2.attention.self.key.weight", "bert.encoder.layer.2.attention.self.key.bias", "bert.encoder.layer.2.attention.self.value.weight", "bert.encoder.layer.2.attention.self.value.bias", "bert.encoder.layer.2.attention.output.dense.weight", "bert.encoder.layer.2.attention.output.dense.bias", "bert.encoder.layer.2.attention.output.LayerNorm.weight", "bert.encoder.layer.2.attention.output.LayerNorm.bias", "bert.encoder.layer.2.intermediate.dense.weight", "bert.encoder.layer.2.intermediate.dense.bias", "bert.encoder.layer.2.output.dense.weight", "bert.encoder.layer.2.output.dense.bias", "bert.encoder.layer.2.output.LayerNorm.weight", "bert.encoder.layer.2.output.LayerNorm.bias", "bert.encoder.layer.3.attention.self.query.weight", "bert.encoder.layer.3.attention.self.query.bias", "bert.encoder.layer.3.attention.self.key.weight", "bert.encoder.layer.3.attention.self.key.bias", "bert.encoder.layer.3.attention.self.value.weight", "bert.encoder.layer.3.attention.self.value.bias", "bert.encoder.layer.3.attention.output.dense.weight", "bert.encoder.layer.3.attention.output.dense.bias", "bert.encoder.layer.3.attention.output.LayerNorm.weight", "bert.encoder.layer.3.attention.output.LayerNorm.bias", "bert.encoder.layer.3.intermediate.dense.weight", "bert.encoder.layer.3.intermediate.dense.bias", "bert.encoder.layer.3.output.dense.weight", "bert.encoder.layer.3.output.dense.bias", "bert.encoder.layer.3.output.LayerNorm.weight", "bert.encoder.layer.3.output.LayerNorm.bias", "bert.encoder.layer.4.attention.self.query.weight", "bert.encoder.layer.4.attention.self.query.bias", "bert.encoder.layer.4.attention.self.key.weight", "bert.encoder.layer.4.attention.self.key.bias", "bert.encoder.layer.4.attention.self.value.weight", "bert.encoder.layer.4.attention.self.value.bias", "bert.encoder.layer.4.attention.output.dense.weight", "bert.encoder.layer.4.attention.output.dense.bias", "bert.encoder.layer.4.attention.output.LayerNorm.weight", "bert.encoder.layer.4.attention.output.LayerNorm.bias", "bert.encoder.layer.4.intermediate.dense.weight", "bert.encoder.layer.4.intermediate.dense.bias", "bert.encoder.layer.4.output.dense.weight", "bert.encoder.layer.4.output.dense.bias", "bert.encoder.layer.4.output.LayerNorm.weight", "bert.encoder.layer.4.output.LayerNorm.bias", "bert.encoder.layer.5.attention.self.query.weight", "bert.encoder.layer.5.attention.self.query.bias", "bert.encoder.layer.5.attention.self.key.weight", "bert.encoder.layer.5.attention.self.key.bias", "bert.encoder.layer.5.attention.self.value.weight", "bert.encoder.layer.5.attention.self.value.bias", "bert.encoder.layer.5.attention.output.dense.weight", "bert.encoder.layer.5.attention.output.dense.bias", "bert.encoder.layer.5.attention.output.LayerNorm.weight", "bert.encoder.layer.5.attention.output.LayerNorm.bias", "bert.encoder.layer.5.intermediate.dense.weight", "bert.encoder.layer.5.intermediate.dense.bias", "bert.encoder.layer.5.output.dense.weight", "bert.encoder.layer.5.output.dense.bias", "bert.encoder.layer.5.output.LayerNorm.weight", "bert.encoder.layer.5.output.LayerNorm.bias", "bert.encoder.layer.6.attention.self.query.weight", "bert.encoder.layer.6.attention.self.query.bias", "bert.encoder.layer.6.attention.self.key.weight", "bert.encoder.layer.6.attention.self.key.bias", "bert.encoder.layer.6.attention.self.value.weight", "bert.encoder.layer.6.attention.self.value.bias", "bert.encoder.layer.6.attention.output.dense.weight", "bert.encoder.layer.6.attention.output.dense.bias", "bert.encoder.layer.6.attention.output.LayerNorm.weight", "bert.encoder.layer.6.attention.output.LayerNorm.bias", "bert.encoder.layer.6.intermediate.dense.weight", "bert.encoder.layer.6.intermediate.dense.bias", "bert.encoder.layer.6.output.dense.weight", "bert.encoder.layer.6.output.dense.bias", "bert.encoder.layer.6.output.LayerNorm.weight", "bert.encoder.layer.6.output.LayerNorm.bias", "bert.encoder.layer.7.attention.self.query.weight", "bert.encoder.layer.7.attention.self.query.bias", "bert.encoder.layer.7.attention.self.key.weight", "bert.encoder.layer.7.attention.self.key.bias", "bert.encoder.layer.7.attention.self.value.weight", "bert.encoder.layer.7.attention.self.value.bias", "bert.encoder.layer.7.attention.output.dense.weight", "bert.encoder.layer.7.attention.output.dense.bias", "bert.encoder.layer.7.attention.output.LayerNorm.weight", "bert.encoder.layer.7.attention.output.LayerNorm.bias", "bert.encoder.layer.7.intermediate.dense.weight", "bert.encoder.layer.7.intermediate.dense.bias", "bert.encoder.layer.7.output.dense.weight", "bert.encoder.layer.7.output.dense.bias", "bert.encoder.layer.7.output.LayerNorm.weight", "bert.encoder.layer.7.output.LayerNorm.bias", "bert.encoder.layer.8.attention.self.query.weight", "bert.encoder.layer.8.attention.self.query.bias", "bert.encoder.layer.8.attention.self.key.weight", "bert.encoder.layer.8.attention.self.key.bias", "bert.encoder.layer.8.attention.self.value.weight", "bert.encoder.layer.8.attention.self.value.bias", "bert.encoder.layer.8.attention.output.dense.weight", "bert.encoder.layer.8.attention.output.dense.bias", "bert.encoder.layer.8.attention.output.LayerNorm.weight", "bert.encoder.layer.8.attention.output.LayerNorm.bias", "bert.encoder.layer.8.intermediate.dense.weight", "bert.encoder.layer.8.intermediate.dense.bias", "bert.encoder.layer.8.output.dense.weight", "bert.encoder.layer.8.output.dense.bias", "bert.encoder.layer.8.output.LayerNorm.weight", "bert.encoder.layer.8.output.LayerNorm.bias", "bert.encoder.layer.9.attention.self.query.weight", "bert.encoder.layer.9.attention.self.query.bias", "bert.encoder.layer.9.attention.self.key.weight", "bert.encoder.layer.9.attention.self.key.bias", "bert.encoder.layer.9.attention.self.value.weight", "bert.encoder.layer.9.attention.self.value.bias", "bert.encoder.layer.9.attention.output.dense.weight", "bert.encoder.layer.9.attention.output.dense.bias", "bert.encoder.layer.9.attention.output.LayerNorm.weight", "bert.encoder.layer.9.attention.output.LayerNorm.bias", "bert.encoder.layer.9.intermediate.dense.weight", "bert.encoder.layer.9.intermediate.dense.bias", "bert.encoder.layer.9.output.dense.weight", "bert.encoder.layer.9.output.dense.bias", "bert.encoder.layer.9.output.LayerNorm.weight", "bert.encoder.layer.9.output.LayerNorm.bias", "bert.encoder.layer.10.attention.self.query.weight", "bert.encoder.layer.10.attention.self.query.bias", "bert.encoder.layer.10.attention.self.key.weight", "bert.encoder.layer.10.attention.self.key.bias", "bert.encoder.layer.10.attention.self.value.weight", "bert.encoder.layer.10.attention.self.value.bias", "bert.encoder.layer.10.attention.output.dense.weight", "bert.encoder.layer.10.attention.output.dense.bias", "bert.encoder.layer.10.attention.output.LayerNorm.weight", "bert.encoder.layer.10.attention.output.LayerNorm.bias", "bert.encoder.layer.10.intermediate.dense.weight", "bert.encoder.layer.10.intermediate.dense.bias", "bert.encoder.layer.10.output.dense.weight", "bert.encoder.layer.10.output.dense.bias", "bert.encoder.layer.10.output.LayerNorm.weight", "bert.encoder.layer.10.output.LayerNorm.bias", "bert.encoder.layer.11.attention.self.query.weight", "bert.encoder.layer.11.attention.self.query.bias", "bert.encoder.layer.11.attention.self.key.weight", "bert.encoder.layer.11.attention.self.key.bias", "bert.encoder.layer.11.attention.self.value.weight", "bert.encoder.layer.11.attention.self.value.bias", "bert.encoder.layer.11.attention.output.dense.weight", "bert.encoder.layer.11.attention.output.dense.bias", "bert.encoder.layer.11.attention.output.LayerNorm.weight", "bert.encoder.layer.11.attention.output.LayerNorm.bias", "bert.encoder.layer.11.intermediate.dense.weight", "bert.encoder.layer.11.intermediate.dense.bias", "bert.encoder.layer.11.output.dense.weight", "bert.encoder.layer.11.output.dense.bias", "bert.encoder.layer.11.output.LayerNorm.weight", "bert.encoder.layer.11.output.LayerNorm.bias", "bert.pooler.dense.weight", "bert.pooler.dense.bias", "classifier_1.weight", "classifier_1.bias", "classifier_2.weight", "classifier_2.bias". 
	Unexpected key(s) in state_dict: "module.bert.embeddings.word_embeddings.weight", "module.bert.embeddings.position_embeddings.weight", "module.bert.embeddings.token_type_embeddings.weight", "module.bert.embeddings.LayerNorm.weight", "module.bert.embeddings.LayerNorm.bias", "module.bert.encoder.layer.0.attention.self.query.weight", "module.bert.encoder.layer.0.attention.self.query.bias", "module.bert.encoder.layer.0.attention.self.key.weight", "module.bert.encoder.layer.0.attention.self.key.bias", "module.bert.encoder.layer.0.attention.self.value.weight", "module.bert.encoder.layer.0.attention.self.value.bias", "module.bert.encoder.layer.0.attention.output.dense.weight", "module.bert.encoder.layer.0.attention.output.dense.bias", "module.bert.encoder.layer.0.attention.output.LayerNorm.weight", "module.bert.encoder.layer.0.attention.output.LayerNorm.bias", "module.bert.encoder.layer.0.intermediate.dense.weight", "module.bert.encoder.layer.0.intermediate.dense.bias", "module.bert.encoder.layer.0.output.dense.weight", "module.bert.encoder.layer.0.output.dense.bias", "module.bert.encoder.layer.0.output.LayerNorm.weight", "module.bert.encoder.layer.0.output.LayerNorm.bias", "module.bert.encoder.layer.1.attention.self.query.weight", "module.bert.encoder.layer.1.attention.self.query.bias", "module.bert.encoder.layer.1.attention.self.key.weight", "module.bert.encoder.layer.1.attention.self.key.bias", "module.bert.encoder.layer.1.attention.self.value.weight", "module.bert.encoder.layer.1.attention.self.value.bias", "module.bert.encoder.layer.1.attention.output.dense.weight", "module.bert.encoder.layer.1.attention.output.dense.bias", "module.bert.encoder.layer.1.attention.output.LayerNorm.weight", "module.bert.encoder.layer.1.attention.output.LayerNorm.bias", "module.bert.encoder.layer.1.intermediate.dense.weight", "module.bert.encoder.layer.1.intermediate.dense.bias", "module.bert.encoder.layer.1.output.dense.weight", "module.bert.encoder.layer.1.output.dense.bias", "module.bert.encoder.layer.1.output.LayerNorm.weight", "module.bert.encoder.layer.1.output.LayerNorm.bias", "module.bert.encoder.layer.2.attention.self.query.weight", "module.bert.encoder.layer.2.attention.self.query.bias", "module.bert.encoder.layer.2.attention.self.key.weight", "module.bert.encoder.layer.2.attention.self.key.bias", "module.bert.encoder.layer.2.attention.self.value.weight", "module.bert.encoder.layer.2.attention.self.value.bias", "module.bert.encoder.layer.2.attention.output.dense.weight", "module.bert.encoder.layer.2.attention.output.dense.bias", "module.bert.encoder.layer.2.attention.output.LayerNorm.weight", "module.bert.encoder.layer.2.attention.output.LayerNorm.bias", "module.bert.encoder.layer.2.intermediate.dense.weight", "module.bert.encoder.layer.2.intermediate.dense.bias", "module.bert.encoder.layer.2.output.dense.weight", "module.bert.encoder.layer.2.output.dense.bias", "module.bert.encoder.layer.2.output.LayerNorm.weight", "module.bert.encoder.layer.2.output.LayerNorm.bias", "module.bert.encoder.layer.3.attention.self.query.weight", "module.bert.encoder.layer.3.attention.self.query.bias", "module.bert.encoder.layer.3.attention.self.key.weight", "module.bert.encoder.layer.3.attention.self.key.bias", "module.bert.encoder.layer.3.attention.self.value.weight", "module.bert.encoder.layer.3.attention.self.value.bias", "module.bert.encoder.layer.3.attention.output.dense.weight", "module.bert.encoder.layer.3.attention.output.dense.bias", "module.bert.encoder.layer.3.attention.output.LayerNorm.weight", "module.bert.encoder.layer.3.attention.output.LayerNorm.bias", "module.bert.encoder.layer.3.intermediate.dense.weight", "module.bert.encoder.layer.3.intermediate.dense.bias", "module.bert.encoder.layer.3.output.dense.weight", "module.bert.encoder.layer.3.output.dense.bias", "module.bert.encoder.layer.3.output.LayerNorm.weight", "module.bert.encoder.layer.3.output.LayerNorm.bias", "module.bert.encoder.layer.4.attention.self.query.weight", "module.bert.encoder.layer.4.attention.self.query.bias", "module.bert.encoder.layer.4.attention.self.key.weight", "module.bert.encoder.layer.4.attention.self.key.bias", "module.bert.encoder.layer.4.attention.self.value.weight", "module.bert.encoder.layer.4.attention.self.value.bias", "module.bert.encoder.layer.4.attention.output.dense.weight", "module.bert.encoder.layer.4.attention.output.dense.bias", "module.bert.encoder.layer.4.attention.output.LayerNorm.weight", "module.bert.encoder.layer.4.attention.output.LayerNorm.bias", "module.bert.encoder.layer.4.intermediate.dense.weight", "module.bert.encoder.layer.4.intermediate.dense.bias", "module.bert.encoder.layer.4.output.dense.weight", "module.bert.encoder.layer.4.output.dense.bias", "module.bert.encoder.layer.4.output.LayerNorm.weight", "module.bert.encoder.layer.4.output.LayerNorm.bias", "module.bert.encoder.layer.5.attention.self.query.weight", "module.bert.encoder.layer.5.attention.self.query.bias", "module.bert.encoder.layer.5.attention.self.key.weight", "module.bert.encoder.layer.5.attention.self.key.bias", "module.bert.encoder.layer.5.attention.self.value.weight", "module.bert.encoder.layer.5.attention.self.value.bias", "module.bert.encoder.layer.5.attention.output.dense.weight", "module.bert.encoder.layer.5.attention.output.dense.bias", "module.bert.encoder.layer.5.attention.output.LayerNorm.weight", "module.bert.encoder.layer.5.attention.output.LayerNorm.bias", "module.bert.encoder.layer.5.intermediate.dense.weight", "module.bert.encoder.layer.5.intermediate.dense.bias", "module.bert.encoder.layer.5.output.dense.weight", "module.bert.encoder.layer.5.output.dense.bias", "module.bert.encoder.layer.5.output.LayerNorm.weight", "module.bert.encoder.layer.5.output.LayerNorm.bias", "module.bert.encoder.layer.6.attention.self.query.weight", "module.bert.encoder.layer.6.attention.self.query.bias", "module.bert.encoder.layer.6.attention.self.key.weight", "module.bert.encoder.layer.6.attention.self.key.bias", "module.bert.encoder.layer.6.attention.self.value.weight", "module.bert.encoder.layer.6.attention.self.value.bias", "module.bert.encoder.layer.6.attention.output.dense.weight", "module.bert.encoder.layer.6.attention.output.dense.bias", "module.bert.encoder.layer.6.attention.output.LayerNorm.weight", "module.bert.encoder.layer.6.attention.output.LayerNorm.bias", "module.bert.encoder.layer.6.intermediate.dense.weight", "module.bert.encoder.layer.6.intermediate.dense.bias", "module.bert.encoder.layer.6.output.dense.weight", "module.bert.encoder.layer.6.output.dense.bias", "module.bert.encoder.layer.6.output.LayerNorm.weight", "module.bert.encoder.layer.6.output.LayerNorm.bias", "module.bert.encoder.layer.7.attention.self.query.weight", "module.bert.encoder.layer.7.attention.self.query.bias", "module.bert.encoder.layer.7.attention.self.key.weight", "module.bert.encoder.layer.7.attention.self.key.bias", "module.bert.encoder.layer.7.attention.self.value.weight", "module.bert.encoder.layer.7.attention.self.value.bias", "module.bert.encoder.layer.7.attention.output.dense.weight", "module.bert.encoder.layer.7.attention.output.dense.bias", "module.bert.encoder.layer.7.attention.output.LayerNorm.weight", "module.bert.encoder.layer.7.attention.output.LayerNorm.bias", "module.bert.encoder.layer.7.intermediate.dense.weight", "module.bert.encoder.layer.7.intermediate.dense.bias", "module.bert.encoder.layer.7.output.dense.weight", "module.bert.encoder.layer.7.output.dense.bias", "module.bert.encoder.layer.7.output.LayerNorm.weight", "module.bert.encoder.layer.7.output.LayerNorm.bias", "module.bert.encoder.layer.8.attention.self.query.weight", "module.bert.encoder.layer.8.attention.self.query.bias", "module.bert.encoder.layer.8.attention.self.key.weight", "module.bert.encoder.layer.8.attention.self.key.bias", "module.bert.encoder.layer.8.attention.self.value.weight", "module.bert.encoder.layer.8.attention.self.value.bias", "module.bert.encoder.layer.8.attention.output.dense.weight", "module.bert.encoder.layer.8.attention.output.dense.bias", "module.bert.encoder.layer.8.attention.output.LayerNorm.weight", "module.bert.encoder.layer.8.attention.output.LayerNorm.bias", "module.bert.encoder.layer.8.intermediate.dense.weight", "module.bert.encoder.layer.8.intermediate.dense.bias", "module.bert.encoder.layer.8.output.dense.weight", "module.bert.encoder.layer.8.output.dense.bias", "module.bert.encoder.layer.8.output.LayerNorm.weight", "module.bert.encoder.layer.8.output.LayerNorm.bias", "module.bert.encoder.layer.9.attention.self.query.weight", "module.bert.encoder.layer.9.attention.self.query.bias", "module.bert.encoder.layer.9.attention.self.key.weight", "module.bert.encoder.layer.9.attention.self.key.bias", "module.bert.encoder.layer.9.attention.self.value.weight", "module.bert.encoder.layer.9.attention.self.value.bias", "module.bert.encoder.layer.9.attention.output.dense.weight", "module.bert.encoder.layer.9.attention.output.dense.bias", "module.bert.encoder.layer.9.attention.output.LayerNorm.weight", "module.bert.encoder.layer.9.attention.output.LayerNorm.bias", "module.bert.encoder.layer.9.intermediate.dense.weight", "module.bert.encoder.layer.9.intermediate.dense.bias", "module.bert.encoder.layer.9.output.dense.weight", "module.bert.encoder.layer.9.output.dense.bias", "module.bert.encoder.layer.9.output.LayerNorm.weight", "module.bert.encoder.layer.9.output.LayerNorm.bias", "module.bert.encoder.layer.10.attention.self.query.weight", "module.bert.encoder.layer.10.attention.self.query.bias", "module.bert.encoder.layer.10.attention.self.key.weight", "module.bert.encoder.layer.10.attention.self.key.bias", "module.bert.encoder.layer.10.attention.self.value.weight", "module.bert.encoder.layer.10.attention.self.value.bias", "module.bert.encoder.layer.10.attention.output.dense.weight", "module.bert.encoder.layer.10.attention.output.dense.bias", "module.bert.encoder.layer.10.attention.output.LayerNorm.weight", "module.bert.encoder.layer.10.attention.output.LayerNorm.bias", "module.bert.encoder.layer.10.intermediate.dense.weight", "module.bert.encoder.layer.10.intermediate.dense.bias", "module.bert.encoder.layer.10.output.dense.weight", "module.bert.encoder.layer.10.output.dense.bias", "module.bert.encoder.layer.10.output.LayerNorm.weight", "module.bert.encoder.layer.10.output.LayerNorm.bias", "module.bert.encoder.layer.11.attention.self.query.weight", "module.bert.encoder.layer.11.attention.self.query.bias", "module.bert.encoder.layer.11.attention.self.key.weight", "module.bert.encoder.layer.11.attention.self.key.bias", "module.bert.encoder.layer.11.attention.self.value.weight", "module.bert.encoder.layer.11.attention.self.value.bias", "module.bert.encoder.layer.11.attention.output.dense.weight", "module.bert.encoder.layer.11.attention.output.dense.bias", "module.bert.encoder.layer.11.attention.output.LayerNorm.weight", "module.bert.encoder.layer.11.attention.output.LayerNorm.bias", "module.bert.encoder.layer.11.intermediate.dense.weight", "module.bert.encoder.layer.11.intermediate.dense.bias", "module.bert.encoder.layer.11.output.dense.weight", "module.bert.encoder.layer.11.output.dense.bias", "module.bert.encoder.layer.11.output.LayerNorm.weight", "module.bert.encoder.layer.11.output.LayerNorm.bias", "module.bert.pooler.dense.weight", "module.bert.pooler.dense.bias", "module.classifier.weight", "module.classifier.bias". 


# Evaluation

## Test

In [None]:
np.set_printoptions(threshold=1000)

In [32]:
def visualize(index, predictions, tokenized_texts, test = True, val_tags=None):
    print('Sentence: '+' '.join(tokenized_texts[index]))
    pred_l = [predictions[index][i] for i in range(len(predictions[index])) if predictions[index][i] not in [hash_token, end_token]]
    pred_l = max(pred_l)
    if not test:
        truth = [i for i in range(len(val_tags[index])) if val_tags[index][i] not in [hash_token, end_token, 0]]
        truth_l = [val_tags[index][i] for i in range(len(val_tags[index])) if val_tags[index][i] not in [hash_token, end_token]]
        truth_l = max(truth_l)
        gold = [tokenized_texts[index][i] for i in truth]
        print('Truth label: ',truth_l, ' Predicted label: ',pred_l)
        print('Porpaganda Sequence: '+' '.join(gold))
 
    
    pred = [i for i in range(len(predictions[index])) if predictions[index][i] not in [hash_token, end_token, 0]]
    
    if pred:
        if test:
            print(len(predictions))
            print(predictions)
            #print([tag_t.get(predictions[0][x]) for x in predictions[0]])
        predicted = [tokenized_texts[index][i] for i in pred]
        print('Predicted Sequence: '+' '.join(predicted))
        #print(predictions)
    else:
        print("No propaganda detected")

In [33]:
visualize(4, predictions, tokenized_texts, False, val_tags.detach().cpu().numpy().tolist())

NameError: name 'tokenized_texts' is not defined

In [42]:
def test_sentence(sample):
    sample = sample.split()
    clean = [[tokenizer.tokenize(words) for words in sent] for sent in [sample]] 
    tokenize = [concatenate_list_data(sent) for sent in clean]
    numerics = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenize],
                          max_len=opt["maxLen"])
    attention_masks = [[float(i>0) for i in ii] for ii in numerics]
    input_sample = torch.tensor(numerics)
    input_mask = torch.tensor(attention_masks)
    model.eval()
    logits = model(input_sample, token_type_ids=None,
                       attention_mask=input_mask)
    predictions_sample = []
    logits = logits.detach().cpu().numpy()
    predictions_sample.extend([list(p) for p in np.argmax(logits, axis=2)])
    visualize(0, predictions_sample, tokenize)

In [47]:
test_sentence('Trump is a piece of trash.')

Sentence: Trump is a piece of trash .
1
[[0, 0, 0, 9, 9, 9, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]]
Predicted Sequence: piece of trash


In [134]:
x = 555

In [135]:
viz = df.loc[df['ID'] == df["ID"][x]]

In [136]:
strPath = "datasets-v5/tasks-2-3/test/article"+df["ID"][x]+".txt"

f = open(strPath)

strText = f.read()

docs = {"text":strText,
       "ents":[],
       "title":None}

for index, row in viz.iterrows():
    docs["ents"].append({"start":row[2], "end":row[3], "label":row[1]})

In [137]:
from spacy import displacy
displacy.render(docs, style="ent", manual=True)