#### <b>필요한 라이브러리 설치 및 불러오기</b>

* <b>Transformer</b>: 트랜스포머 아키텍처 라이브러리
  * 한국어 모델을 포함해 다양한 큰 규모의(large-scale) 트랜스포머 모델(BERT, ELECTRA 등) 을 제공한다.
* <b>Soynlp</b>: 한국어를 위한 토큰화 라이브러리
  * 직접 사전(dictionary)를 직접 정의하지 않고도, <b>한국어</b>가 가지는 특수한 단어를 포함하여 토큰으로 만들 수 있도록 도와준다.

In [1]:
!pip install transformers
!pip install soynlp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m52.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m118.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90

In [2]:
# PyTorch 라이브러리 불러오기
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

# 사전 학습된 트랜스포머(Transformer) 모델 사용하기
from transformers import (
    ElectraPreTrainedModel,
    ElectraModel,
    ElectraConfig,
    ElectraTokenizer,
    BertPreTrainedModel,
    BertModel,
    BertConfig,
    BertTokenizer
)

# 트랜스포머(Transformer) 학습을 위한 라이브러리 불러오기
from transformers import AdamW, get_linear_schedule_with_warmup

# 한국어 모델 학습을 위한 정규화 라이브러리 사용
from soynlp.normalizer import emoticon_normalize, repeat_normalize

# 기타 라이브러리 불러오기
import os
import re
import copy
import json
import logging
import random
import numpy as np

# 모델 학습 및 학습된 모델 평가를 위한 라이브러리
from sklearn.metrics import f1_score
from tqdm import tqdm, trange

# 하이퍼 파라미터 정의 목적
from argparse import Namespace
from types import SimpleNamespace

#### <b>헤드(Head) 모델 정의하기</b>

* 실제로 내가 원하는 기능을 수행하는 모델을 정의한다.
* <b>헤드(head)</b>: 잘 학습된 모델 뒤에 붙여서 특정한 기능을 위한 작은 크기의 네트워크 모델
  * BERT 혹은 ELECTRA를 통해 추출된 특징(feature)를 이용해 실제로 분류 기능을 수행할 수 있다.

In [3]:
class BiasClassificationHead(nn.Module):
    """Head for Bias Classification"""

    def __init__(self, config, num_bias_labels):
        super().__init__()
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_bias_labels)

    def forward(self, x):
        x = self.dropout(x)
        x = self.classifier(x)
        return x


class HateClassificationHead(nn.Module):
    """Head for Hate Classification"""

    def __init__(self, config, num_hate_labels):
        super().__init__()
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_hate_labels)

    def forward(self, x):
        x = self.dropout(x)
        x = self.classifier(x)
        return x

#### <b>특징 추출기(Feature Extractor) 모델 정의하기</b>

* 하나의 텍스트(text) 입력이 주어졌을 때, 해당 텍스트에 대하여 특징(feature)을 추출한다.
  * 대표적인 Large Language 모델인 BERT와 ELECTRA를 불러와 사용할 수 있다.

In [4]:
class ElectraForBiasClassification(ElectraPreTrainedModel):
    def __init__(self,
                 config: ElectraConfig,
                 args: Namespace,
                 bias_label_lst=None,
                 hate_label_lst=None):
        super().__init__(config)
        self.args = args
        self.num_bias_labels = len(bias_label_lst) if bias_label_lst is not None else 0
        self.num_hate_labels = len(hate_label_lst) if hate_label_lst is not None else 0

        self.electra = ElectraModel(config) # 특징 추출기

        # 편견(bias)와 혐오(hate) 분류 모델을 동시에 학습한다. (multi-task 상황)
        self.bias_classifier = BiasClassificationHead(config, self.num_bias_labels)
        self.hate_classifier = HateClassificationHead(config, self.num_hate_labels)

        # 여러 개의 클래스 중에서 하나로 예측하는 multi-class classification 상황 가정
        self.loss_fct = nn.CrossEntropyLoss()

        self.init_weights()

    # 실제로 모델을 호출했을 때 사용하는 함수
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        bias_labels=None,
        hate_labels=None,
        output_attentions=None,
        output_hidden_states=None,
    ):
        discriminator_hidden_states = self.electra(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states
        )

        pooled_output = discriminator_hidden_states[0][:, 0]

        bias_logits = self.bias_classifier(pooled_output)
        hate_logits = self.hate_classifier(pooled_output)

        total_loss = 0

        # 편견(bias) 모델 사용하기
        if bias_labels is not None:
            bias_loss = self.loss_fct(bias_logits.view(-1, self.num_bias_labels), bias_labels.view(-1))
            total_loss += self.args.bias_loss_coef * bias_loss

        # 혐오(hate) 모델 사용하기
        if hate_labels is not None:
            hate_loss = self.loss_fct(hate_logits.view(-1, self.num_hate_labels), hate_labels.view(-1))
            total_loss += self.args.hate_loss_coef * hate_loss

        outputs = ((bias_logits, hate_logits),) + discriminator_hidden_states[1:]  # add hidden states and attention if they are here

        outputs = (total_loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)


class BertForBiasClassification(BertPreTrainedModel):
    def __init__(self,
                 config: BertConfig,
                 args: Namespace,
                 bias_label_lst=None,
                 hate_label_lst=None):
        super().__init__(config)
        self.args = args
        self.num_bias_labels = len(bias_label_lst) if bias_label_lst is not None else 0
        self.num_hate_labels = len(hate_label_lst) if hate_label_lst is not None else 0

        self.bert = BertModel(config) # 특징 추출기

        # 편견(bias)와 혐오(hate) 분류 모델을 동시에 학습한다. (multi-task 상황)
        self.bias_classifier = BiasClassificationHead(config, self.num_bias_labels)
        self.hate_classifier = HateClassificationHead(config, self.num_hate_labels)

        # 여러 개의 클래스 중에서 하나로 예측하는 multi-class classification 상황 가정
        self.loss_fct = nn.CrossEntropyLoss()

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        bias_labels=None,
        hate_labels=None,
        output_attentions=None,
        output_hidden_states=None,
    ):
        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
        )

        pooled_output = outputs[1]

        bias_logits = self.bias_classifier(pooled_output)
        hate_logits = self.hate_classifier(pooled_output)

        total_loss = 0

        # 편견(bias) 모델 사용하기
        if bias_labels is not None:
            bias_loss = self.loss_fct(bias_logits.view(-1, self.num_bias_labels), bias_labels.view(-1))
            total_loss += self.args.bias_loss_coef * bias_loss

        # 혐오(hate) 모델 사용하기
        if hate_labels is not None:
            hate_loss = self.loss_fct(hate_logits.view(-1, self.num_hate_labels), hate_labels.view(-1))
            total_loss += self.args.hate_loss_coef * hate_loss

        outputs = ((bias_logits, hate_logits),) + outputs[2:]  # add hidden states and attention if they are here

        outputs = (total_loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

#### <b>트랜스포머 모델에 관한 기본적인 라이브러리 및 설정</b>

In [5]:
MODEL_CLASSES = {
    "koelectra-base": (ElectraConfig, ElectraForBiasClassification, ElectraTokenizer),
    "koelectra-small": (ElectraConfig, ElectraForBiasClassification, ElectraTokenizer),
    "koelectra-base-v2": (ElectraConfig, ElectraForBiasClassification, ElectraTokenizer),
    "koelectra-small-v2": (ElectraConfig, ElectraForBiasClassification, ElectraTokenizer),
    "kcbert-base": (BertConfig, BertForBiasClassification, BertTokenizer),
}


def load_tokenizer(args):
    return MODEL_CLASSES[args.model_type][2].from_pretrained(args.model_name_or_path)


def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if not args.no_cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)


def compute_metrics(pred_bias_labels, pred_hate_labels, gt_bias_labels, gt_hate_labels):
    bias_weighted_f1 = f1_score(gt_bias_labels, pred_bias_labels, average="weighted")
    hate_weighted_f1 = f1_score(gt_hate_labels, pred_hate_labels, average="weighted")

    bias_macro_f1 = f1_score(gt_bias_labels, pred_bias_labels, average="macro")
    hate_macro_f1 = f1_score(gt_hate_labels, pred_hate_labels, average="macro")

    mean_weighted_f1 = (bias_weighted_f1 + hate_weighted_f1) / 2
    return {
        "bias_weighted_f1": bias_weighted_f1,
        "hate_weighted_f1": hate_weighted_f1,
        "mean_weighted_f1": mean_weighted_f1,
        "bias_macro_f1": bias_macro_f1,
        "hate_macro_f1": hate_macro_f1
    }

#### <b>한국어 혐오 표현 분류 모델을 위한 전처리 부분</b>

* 내가 원하는 데이터 세트에 대하여, 데이터를 불러오는 함수를 작성할 필요가 있다.

In [6]:
# 제목(title)과 댓글(comment)을 전처리하는 함수
def preprocess(title: str, comment: str):
    # Erase redundant \" in the start & end of the title
    if title.startswith("\""):
        title = title[1:]
    if title.endswith("\""):
        title = title[:-1]

    # Change quotes
    title = title.replace("“", "\"").replace("”", "\"").replace("‘", "\'").replace("’", "\'")

    # Erase braces in title
    braces = r"\[(.*?)\]"
    braces2 = r"\{(.*?)\}"
    braces3 = r"\【(.*?)\】"
    braces4 = r"\<(.*?)\>"

    title = re.sub(braces, '', title)
    title = re.sub(braces2, '', title)
    title = re.sub(braces3, '', title)
    title = re.sub(braces4, '', title)

    # Normalize the comment
    comment = emoticon_normalize(comment, num_repeats=3)
    comment = repeat_normalize(comment, num_repeats=3)

    return title, comment


# 하나의 텍스트(text) 입력에 대한 클래스
class InputExample(object):
    """ A single training/test example for simple sequence classification. """

    def __init__(self,
                 guid,
                 text_a,
                 text_b,
                 bias_label,
                 hate_label):
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
        self.bias_label = bias_label
        self.hate_label = hate_label

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


# 하나의 텍스트 입력의 특징(feature)에 대한 클래스
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self,
                 input_ids,
                 attention_mask,
                 token_type_ids,
                 bias_label=None,
                 hate_label=None):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.bias_label = bias_label
        self.hate_label = hate_label

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


# 한국어 혐오 표현 전처리 기능 클래스
class KoreanHateSpeechProcessor(object):
    """Processor for the Korean Hate Speech data set """

    def __init__(self, args):
        self.args = args

    @classmethod
    def get_labels(cls):
        bias_label_lst = ['none', 'gender', 'others']
        hate_label_lst = ['none', 'hate', 'offensive']
        return bias_label_lst, hate_label_lst

    @classmethod
    def _read_file(cls, input_file):
        """Reads a tab separated value file."""
        with open(input_file, "r", encoding="utf-8") as f:
            lines = []
            for line in f:
                lines.append(line.strip())
            return lines

    def _create_examples(self, lines, set_type):
        """Creates examples for the train, dev and test sets."""
        examples = []
        for (i, line) in enumerate(lines[1:]):  # Except the header
            line = line.split('\t')
            guid = "%s-%s" % (set_type, i)
            title = line[0]
            comment = line[1]
            title, comment = preprocess(title, comment)

            bias_label = None
            hate_label = None
            if set_type != "test":
                bias_label = line[2]
                hate_label = line[3]
            if i % 1000 == 0:
                print([title, comment])

            examples.append(InputExample(guid=guid,
                                         text_a=comment,
                                         text_b=title,
                                         bias_label=bias_label,
                                         hate_label=hate_label))
        return examples

    def get_examples(self, mode):
        """
        Args:
            mode: train, dev, test
        """
        file_to_read = None
        if mode == 'train':
            file_to_read = self.args.train_file
        elif mode == 'dev':
            file_to_read = self.args.dev_file
        elif mode == 'test':
            file_to_read = self.args.test_file

        print("LOOKING AT {}".format(os.path.join(self.args.data_dir, file_to_read)))
        return self._create_examples(self._read_file(os.path.join(self.args.data_dir, file_to_read)), mode)


# 실질적으로 텍스트 입력을 트랜스포머 모델에 넣어 특징(feature)을 계산하는 함수
def convert_examples_to_features(
        examples,
        tokenizer,
        max_length,
):
    bias_label_list, hate_label_list = KoreanHateSpeechProcessor.get_labels()

    bias_label_map = {label: i for i, label in enumerate(bias_label_list)}
    hate_label_map = {label: i for i, label in enumerate(hate_label_list)}

    def label_from_example(example):
        bias_label_id = -1
        hate_label_id = -1
        if example.bias_label is not None:
            bias_label_id = bias_label_map[example.bias_label]
        if example.hate_label is not None:
            hate_label_id = hate_label_map[example.hate_label]
        return bias_label_id, hate_label_id

    labels = [label_from_example(example) for example in examples]

    batch_encoding = tokenizer.batch_encode_plus(
        [(example.text_a, example.text_b) for example in examples], max_length=max_length, pad_to_max_length=True
    )

    features = []
    for i in range(len(examples)):
        inputs = {k: batch_encoding[k][i] for k in batch_encoding}
        if "token_type_ids" not in inputs:
            inputs["token_type_ids"] = [0] * len(inputs["input_ids"])  # For xlm-roberta, distilkobert

        feature = InputFeatures(**inputs, bias_label=labels[i][0], hate_label=labels[i][1])
        features.append(feature)

    for i, example in enumerate(examples[:5]):
        print("*** Example ***")
        print("guid: {}".format(example.guid))
        print("input_ids: {}".format(" ".join([str(x) for x in features[i].input_ids])))
        print("attention_mask: {}".format(" ".join([str(x) for x in features[i].attention_mask])))
        print("token_type_ids: {}".format(" ".join([str(x) for x in features[i].token_type_ids])))
        print("bias_label: {}".format(features[i].bias_label))
        print("hate_label: {}".format(features[i].hate_label))

    return features


def load_examples(args, tokenizer, mode):
    processor = KoreanHateSpeechProcessor(args)

    print("Creating features from dataset file at %s", args.data_dir)
    if mode == "train":
        examples = processor.get_examples("train")
    elif mode == "dev":
        examples = processor.get_examples("dev")
    elif mode == "test":
        examples = processor.get_examples("test")
    else:
        raise Exception("For mode, Only train, dev, test is available")

    features = convert_examples_to_features(
        examples,
        tokenizer,
        args.max_seq_len
    )

    # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    all_bias_labels = torch.tensor([f.bias_label for f in features], dtype=torch.long)
    all_hate_labels = torch.tensor([f.hate_label for f in features], dtype=torch.long)

    dataset = TensorDataset(all_input_ids,
                            all_attention_mask,
                            all_token_type_ids,
                            all_bias_labels,
                            all_hate_labels)
    return dataset

#### <b>모델 학습을 위한 클래스 정의</b>

* 실질적으로 모델을 학습할 수 있도록 해준다.

In [7]:
class Trainer(object):
    def __init__(self, args, tokenizer, train_dataset=None, dev_dataset=None, test_dataset=None):
        self.args = args
        self.tokenizer = tokenizer
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.bias_label_lst, self.hate_label_lst = KoreanHateSpeechProcessor.get_labels()

        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]

        self.config = self.config_class.from_pretrained(args.model_name_or_path,
                                                        finetuning_task=args.task)
        self.model = self.model_class.from_pretrained(args.model_name_or_path,
                                                      config=self.config,
                                                      args=args,
                                                      bias_label_lst=self.bias_label_lst,
                                                      hate_label_lst=self.hate_label_lst)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
        self.model.to(self.device)

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(self.train_dataset, sampler=train_sampler, batch_size=self.args.train_batch_size)

        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            self.args.num_train_epochs = self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
        else:
            t_total = len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'weight_decay': self.args.weight_decay},
            {'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.learning_rate, eps=self.args.adam_epsilon)
        scheduler = get_linear_schedule_with_warmup(optimizer,
                                                    num_warmup_steps=int(t_total * self.args.warmup_proportion),
                                                    num_training_steps=t_total)

        # Train!
        print("***** Running training *****")
        print("  Num examples = %d", len(self.train_dataset))
        print("  Num Epochs = %d", self.args.num_train_epochs)
        print("  Total train batch size = %d", self.args.train_batch_size)
        print("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        print("  Total optimization steps = %d", t_total)
        print("  Logging steps = %d", self.args.logging_steps)

        global_step = 0
        tr_loss = 0.0
        best_mean_weighted_f1 = 0.0
        self.model.zero_grad()

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")

        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(t.to(self.device) for t in batch)  # GPU or CPU
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'bias_labels': batch[3],
                          'hate_labels': batch[4]}
                if self.args.model_type != 'distilkobert':
                    inputs['token_type_ids'] = batch[2]
                outputs = self.model(**inputs)
                loss = outputs[0]

                if self.args.gradient_accumulation_steps > 1:
                    loss = loss / self.args.gradient_accumulation_steps

                loss.backward()

                tr_loss += loss.item()
                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.args.logging_steps > 0 and global_step % self.args.logging_steps == 0:
                        results = self.evaluate("dev")
                        if results["mean_weighted_f1"] > best_mean_weighted_f1:  # Save best result based on mean f1 score
                            best_mean_weighted_f1 = results["mean_weighted_f1"]
                            self.save_model()

                if 0 < self.args.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.args.max_steps < global_step:
                train_iterator.close()
                break

        return global_step, tr_loss / global_step

    def evaluate(self, mode):
        if mode == 'test':
            dataset = self.test_dataset
        elif mode == 'dev':
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size)

        # Eval!
        print("***** Running evaluation on %s dataset *****", mode)
        print("  Num examples = %d", len(dataset))
        print("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0

        bias_preds = None
        bias_out_label_ids = None
        hate_preds = None
        hate_out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'bias_labels': batch[3],
                          'hate_labels': batch[4]}
                if self.args.model_type != 'distilkobert':
                    inputs['token_type_ids'] = batch[2]
                outputs = self.model(**inputs)
                tmp_eval_loss, (bias_logits, hate_logits) = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            # Bias
            if bias_preds is None:
                bias_preds = bias_logits.detach().cpu().numpy()
                bias_out_label_ids = inputs['bias_labels'].detach().cpu().numpy()
            else:
                bias_preds = np.append(bias_preds, bias_logits.detach().cpu().numpy(), axis=0)
                bias_out_label_ids = np.append(
                    bias_out_label_ids, inputs['bias_labels'].detach().cpu().numpy(), axis=0)

            # Hate
            if hate_preds is None:
                hate_preds = hate_logits.detach().cpu().numpy()
                hate_out_label_ids = inputs['hate_labels'].detach().cpu().numpy()
            else:
                hate_preds = np.append(hate_preds, hate_logits.detach().cpu().numpy(), axis=0)
                hate_out_label_ids = np.append(
                    hate_out_label_ids, inputs['hate_labels'].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {
            "loss": eval_loss
        }

        bias_preds = np.argmax(bias_preds, axis=1)
        hate_preds = np.argmax(hate_preds, axis=1)
        result = compute_metrics(bias_preds, hate_preds, bias_out_label_ids, hate_out_label_ids)
        results.update(result)

        print("***** Eval results *****")
        for key in sorted(results.keys()):
            print("  %s = %s", key, str(results[key]))

        return results

    def predict(self):
        # Predict the test dataset which doesn't have label
        dataset = self.test_dataset
        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size)

        # Eval!
        print("***** Running prediction on test dataset *****")
        print("  Num examples = %d", len(dataset))
        print("  Batch size = %d", self.args.eval_batch_size)

        bias_preds = None
        hate_preds = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'bias_labels': None,
                          'hate_labels': None}
                if self.args.model_type != 'distilkobert':
                    inputs['token_type_ids'] = batch[2]
                outputs = self.model(**inputs)
                _, (bias_logits, hate_logits) = outputs[:2]

            # Bias
            if bias_preds is None:
                bias_preds = bias_logits.detach().cpu().numpy()
            else:
                bias_preds = np.append(bias_preds, bias_logits.detach().cpu().numpy(), axis=0)

            # Hate
            if hate_preds is None:
                hate_preds = hate_logits.detach().cpu().numpy()
            else:
                hate_preds = np.append(hate_preds, hate_logits.detach().cpu().numpy(), axis=0)

        bias_preds = np.argmax(bias_preds, axis=1).tolist()
        hate_preds = np.argmax(hate_preds, axis=1).tolist()

        # Write the result
        print("Writing Prediction to {}...".format(self.args.prediction_file))
        if not os.path.exists(self.args.pred_dir):
            os.makedirs(self.args.pred_dir)
        with open(os.path.join(self.args.pred_dir, self.args.prediction_file), "w", encoding="utf-8") as f:
            f.write("bias,hate\n")
            for bias_idx, hate_idx in zip(bias_preds, hate_preds):
                f.write("{},{}\n".format(self.bias_label_lst[bias_idx], self.hate_label_lst[hate_idx]))

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.args.model_dir):
            os.makedirs(self.args.model_dir)
        model_to_save = self.model.module if hasattr(self.model, 'module') else self.model
        model_to_save.save_pretrained(self.args.model_dir)
        self.tokenizer.save_pretrained(self.args.model_dir)

        # Save training arguments together with the trained model
        torch.save(self.args, os.path.join(self.args.model_dir, 'training_args.bin'))
        print("Saving model checkpoint to %s", self.args.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.args.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        self.config = self.config_class.from_pretrained(self.args.model_dir)
        self.model = self.model_class.from_pretrained(self.args.model_dir,
                                                      config=self.config,
                                                      args=self.args,
                                                      bias_label_lst=self.bias_label_lst,
                                                      hate_label_lst=self.hate_label_lst)

        self.model.to(self.device)
        print("***** Model Loaded *****")

#### <b>한국어 혐오 데이터 세트 불러오기</b>

In [8]:
!git clone https://github.com/monologg/korean-hate-speech-koelectra
%cd korean-hate-speech-koelectra

Cloning into 'korean-hate-speech-koelectra'...
remote: Enumerating objects: 44, done.[K
remote: Counting objects: 100% (44/44), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 44 (delta 18), reused 30 (delta 10), pack-reused 0[K
Unpacking objects: 100% (44/44), 769.20 KiB | 4.72 MiB/s, done.
/content/korean-hate-speech-koelectra


#### <b>하이퍼 파라미터 및 인자 값 설정하기</b>

In [9]:
args = SimpleNamespace()

args.task = "korean-hate-speech" # The name of the task to train

args.model_dir = "./model" # Path to save, load model
args.data_dir = "./data" # The input data dir
args.pred_dir = "./preds" # Directory that saves prediction files

args.train_file = "train.txt" # Train file
args.dev_file = "validate.txt" # Dev file
args.test_file = "test.txt" # Test file
args.prediction_file = "prediction.csv" # Output file for prediction

args.model_type = "koelectra-base-v2" # Model type selected
args.model_name_or_path = "monologg/koelectra-base-v2-discriminator" # Model name or path

args.seed = 42 # random seed for initialization
args.train_batch_size = 16 # Batch size for training.
args.eval_batch_size = 32 # Batch size for evaluation.
args.max_seq_len = 100 # The maximum total input sequence length after tokenization.
args.learning_rate = 5e-5 # The initial learning rate for Adam.
args.num_train_epochs = 10.0 # Total number of training epochs to perform.
args.weight_decay = 0.0 # Weight decay if we apply some.
args.gradient_accumulation_steps = 1 # Number of updates steps to accumulate before performing a backward/update pass.
args.adam_epsilon = 1e-8 # Epsilon for Adam optimizer.
args.max_grad_norm = 1.0 # Max gradient norm.
args.max_steps = -1 # If > 0: set total number of training steps to perform. Override num_train_epochs.
args.warmup_proportion = 0.1 # Warmup proportion for linear warmup

args.logging_steps = 200 # Log and save every X updates steps.

args.do_train = True # Whether to run training.
args.do_pred = True # Whether to run prediction on the test set.
args.no_cuda = False # Avoid using CUDA when available

args.bias_loss_coef = 0.5 # Coefficient for the bias loss.
args.hate_loss_coef = 1.0 # Coefficient for the hate loss.

#### <b>모델 학습을 위한 객체 초기화</b>

In [10]:
set_seed(args)

tokenizer = load_tokenizer(args)
train_dataset = load_examples(args, tokenizer, mode="train")
dev_dataset = load_examples(args, tokenizer, mode="dev")
test_dataset = load_examples(args, tokenizer, mode="test")
trainer = Trainer(args, tokenizer, train_dataset, dev_dataset, test_dataset)

Downloading (…)solve/main/vocab.txt: 0.00B [00:00, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/487 [00:00<?, ?B/s]

Creating features from dataset file at %s ./data
LOOKING AT ./data/train.txt
['밤새 조문 행렬…故 전미선, 동료들이 그리워하는 따뜻한 배우 ', '(현재 호텔주인 심정) 아18 난 마른하늘에 날벼락맞고 호텔망하게생겼는데 누군 계속 추모받네....']
['\'前 여자친구 피소\' 김정훈, 사흘 침묵→"원만한 해결 위해 노력" ', '근데 솔직히 김정훈 천재유전자 받고 싶어하는 여자들 많을텐데..']
[' 주진모 향한 조롱, 위기 극복 할까?', '댓글 성비랑 나이대보면 소름이 돋는다. 30~40대 아지매들 밥쳐먹고 할일없어 연예인한테 악플다는게 취미인가 보네.']
['최종훈, 집단 성폭행 의혹…""동석했지만 성관계 NO"" ', '범죄를 범죄인줄 모르고 ㅈㄹ는놈들이나 남자들 무데기로 있는 호텔방가서 ㅊ는 ㄴ나 에혀저런 ㄴ 만나결혼하면 인생 망친다']


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


["'황금빛' 신혜선, 천호진 위암 말기 사실 알았다..'충격' ", '아니ᆢᆢ지안이 황금빛내인생은 언제부터야ᆢᆢ']
['오또맘, \'장성규 팔로우\' 게시글 결국 사과..""경솔하게 행동했다"" ', '예쁘다. 이혼녀라도 환영한다']
[' 티아라, 前소속사와 맞선다..""상표출원 거절사유 제출""', '이제는 간~~다. 간~~~다 떠나는 이아픔. 왕따해서~~이미지추락한. ㅌ~~아라. 간~~~다. 간~~~다~~이젠. 홀로서기~~목매워 노래 부르네~~']
["'너노들' 연우진♥김세정, 달콤한 키스…김시후 죽음의 진실은(종합)", '진짜 연기못하는 애들만 모아뒀네 월.화 드라마가시청률 똥망이라 드라마 잠정 폐지까지 한다는데연기못하는 아이돌들은 왜자꾸 갔다가 쓰는건지 몰겠네쌍수해서 얼굴쳐다보는것도 졸부담이구 소름돋는 발연기에 그러니 시청률이 2%지']


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

*** Example ***
guid: train-0
input_ids: 2 36 401 1840 29995 29972 5676 39 44 9920 697 12741 22427 29951 192 28327 30317 29955 1840 30275 29959 30007 30058 30593 29950 30090 4592 765 4728 30160 30326 5 5 5 5 3 14340 9617 10377 1 7768 20158 30014 18 2617 3782 18853 29959 29950 3269 29957 851 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
bias_label: 2
hate_label: 1
*** Example ***
guid: train-1
input_ids: 2 5 5 5 5 131 30006 29972 12045 29953 237 30006 29972 107 5 5 5 5931 7518 29955 22804 14328 23495 5 5 5 46 2349

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Creating features from dataset file at %s ./data
LOOKING AT ./data/validate.txt
["'아스달 연대기' 뇌안탈 후계자 송중기, 이아르크에 숨어들었다 ", '송중기 시대극은 믿고본다. 첫회 신선하고 좋았다.']
*** Example ***
guid: dev-0
input_ids: 2 18867 840 30315 29961 1500 29955 6075 5 506 30016 4505 29959 29955 300 30190 29948 5 3 40 6050 30124 2420 29960 40 1818 30043 30419 11561 18867 18 6 11840 30169 29951 1200 14961 30080 29948 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
bias_label: 0
hate_label: 0
*** Example ***
guid: dev-1
input_id

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


*** Example ***
guid: test-0
input_ids: 2 1 3580 71 29989 29973 29995 29950 1052 29992 204 29989 29962 300 30251 29948 1 512 29992 29961 446 30039 29956 124 29998 30365 31103 1 3 19742 446 30039 18 2427 2930 30171 30068 40 137 30539 29953 40 2368 147 30190 29948 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
bias_label: -1
hate_label: -1
*** Example ***
guid: test-1
input_ids: 2 932 29948 447 300 29948 260 1555 29959 11294 3 1 18 596 30036 30306 88 30027 30084 3915 9972 322 11711 5 5 41 41 1835 613 179

Downloading pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

Some weights of the model checkpoint at monologg/koelectra-base-v2-discriminator were not used when initializing ElectraForBiasClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForBiasClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForBiasClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForBiasClassification were not initialized from the model checkpoint at monologg/koelectra-base-v2-discriminator and are newly initialized: ['hate_classifie

#### <b>모델 학습</b>

In [None]:
if args.do_train:
    trainer.train()



***** Running training *****
  Num examples = %d 7896
  Num Epochs = %d 10.0
  Total train batch size = %d 16
  Gradient Accumulation steps = %d 1
  Total optimization steps = %d 4940.0
  Logging steps = %d 200


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]
Iteration:   0%|          | 0/494 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/494 [00:02<23:21,  2.84s/it][A
Iteration:   0%|          | 2/494 [00:03<10:45,  1.31s/it][A
Iteration:   1%|          | 3/494 [00:03<06:44,  1.21it/s][A
Iteration:   1%|          | 4/494 [00:03<04:52,  1.67it/s][A
Iteration:   1%|          | 5/494 [00:03<03:51,  2.12it/s][A
Iteration:   1%|          | 6/494 [00:04<03:13,  2.53it/s][A
Iteration:   1%|▏         | 7/494 [00:04<02:49,  2.87it/s][A
Iteration:   2%|▏         | 8/494 [00:04<02:33,  3.16it/s][A
Iteration:   2%|▏         | 9/494 [00:04<02:23,  3.39it/s][A
Iteration:   2%|▏         | 10/494 [00:05<02:16,  3.54it/s][A
Iteration:   2%|▏         | 11/494 [00:05<02:11,  3.68it/s][A
Iteration:   2%|▏         | 12/494 [00:05<02:08,  3.76it/s][A
Iteration:   3%|▎         | 13/494 [00:05<02:06,  3.81it/s][A
Iteration:   3%|▎         | 14/494 [00:06<02:04,  3.85it/s][A
Iteration:   3%|▎         |

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  6.16it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  6.04it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.94it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:01,  5.86it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.92it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.90it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.89it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.89it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.89it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.86it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:01<00:00,  5.87it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.88it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.87it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.87it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.35069796811221615
  %s = %s bias_weighted_f1 0.6419962162361705
  %s = %s hate_macro_f1 0.5458218716544224
  %s = %s hate_weighted_f1 0.5241080485033256
  %s = %s loss 1.2761431058247885
  %s = %s mean_weighted_f1 0.5830521323697481



Iteration:  40%|████      | 200/494 [01:00<10:44,  2.19s/it][A

Saving model checkpoint to %s ./model



Iteration:  41%|████      | 201/494 [01:01<08:01,  1.64s/it][A
Iteration:  41%|████      | 202/494 [01:01<05:58,  1.23s/it][A
Iteration:  41%|████      | 203/494 [01:01<04:33,  1.07it/s][A
Iteration:  41%|████▏     | 204/494 [01:02<03:33,  1.36it/s][A
Iteration:  41%|████▏     | 205/494 [01:02<02:53,  1.67it/s][A
Iteration:  42%|████▏     | 206/494 [01:02<02:24,  2.00it/s][A
Iteration:  42%|████▏     | 207/494 [01:02<02:03,  2.32it/s][A
Iteration:  42%|████▏     | 208/494 [01:03<01:49,  2.61it/s][A
Iteration:  42%|████▏     | 209/494 [01:03<01:39,  2.87it/s][A
Iteration:  43%|████▎     | 210/494 [01:03<01:32,  3.07it/s][A
Iteration:  43%|████▎     | 211/494 [01:03<01:27,  3.23it/s][A
Iteration:  43%|████▎     | 212/494 [01:04<01:23,  3.37it/s][A
Iteration:  43%|████▎     | 213/494 [01:04<01:21,  3.46it/s][A
Iteration:  43%|████▎     | 214/494 [01:04<01:19,  3.54it/s][A
Iteration:  44%|████▎     | 215/494 [01:05<01:17,  3.61it/s][A
Iteration:  44%|████▎     | 216/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.53it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.21it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.22it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.25it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.20it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.27it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.28it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.25it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.26it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.22it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.25it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.28it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.24it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.26it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.4869359496225168
  %s = %s bias_weighted_f1 0.7259107783872495
  %s = %s hate_macro_f1 0.5659307748256536
  %s = %s hate_weighted_f1 0.5491727757080479
  %s = %s loss 1.1640299201011657
  %s = %s mean_weighted_f1 0.6375417770476487



Iteration:  81%|████████  | 400/494 [02:01<02:31,  1.61s/it][A

Saving model checkpoint to %s ./model



Iteration:  81%|████████  | 401/494 [02:01<01:52,  1.21s/it][A
Iteration:  81%|████████▏ | 402/494 [02:01<01:26,  1.07it/s][A
Iteration:  82%|████████▏ | 403/494 [02:02<01:07,  1.35it/s][A
Iteration:  82%|████████▏ | 404/494 [02:02<00:54,  1.65it/s][A
Iteration:  82%|████████▏ | 405/494 [02:02<00:45,  1.96it/s][A
Iteration:  82%|████████▏ | 406/494 [02:03<00:38,  2.27it/s][A
Iteration:  82%|████████▏ | 407/494 [02:03<00:34,  2.54it/s][A
Iteration:  83%|████████▎ | 408/494 [02:03<00:31,  2.76it/s][A
Iteration:  83%|████████▎ | 409/494 [02:03<00:28,  2.95it/s][A
Iteration:  83%|████████▎ | 410/494 [02:04<00:26,  3.12it/s][A
Iteration:  83%|████████▎ | 411/494 [02:04<00:25,  3.24it/s][A
Iteration:  83%|████████▎ | 412/494 [02:04<00:24,  3.32it/s][A
Iteration:  84%|████████▎ | 413/494 [02:05<00:24,  3.36it/s][A
Iteration:  84%|████████▍ | 414/494 [02:05<00:23,  3.42it/s][A
Iteration:  84%|████████▍ | 415/494 [02:05<00:22,  3.46it/s][A
Iteration:  84%|████████▍ | 416/494 [02

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.66it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.49it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.53it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:01,  5.52it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.46it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.47it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.47it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.53it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.53it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.48it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.46it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.44it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.48it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.49it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6201187025666667
  %s = %s bias_weighted_f1 0.7686271776873491
  %s = %s hate_macro_f1 0.567613954076978
  %s = %s hate_weighted_f1 0.5510470635864065
  %s = %s loss 1.2558297952016195
  %s = %s mean_weighted_f1 0.6598371206368778



Iteration:  21%|██▏       | 106/494 [00:41<24:31,  3.79s/it][A

Saving model checkpoint to %s ./model



Iteration:  22%|██▏       | 107/494 [00:41<17:47,  2.76s/it][A
Iteration:  22%|██▏       | 108/494 [00:42<12:57,  2.01s/it][A
Iteration:  22%|██▏       | 109/494 [00:42<09:34,  1.49s/it][A
Iteration:  22%|██▏       | 110/494 [00:42<07:11,  1.12s/it][A
Iteration:  22%|██▏       | 111/494 [00:42<05:34,  1.14it/s][A
Iteration:  23%|██▎       | 112/494 [00:43<04:25,  1.44it/s][A
Iteration:  23%|██▎       | 113/494 [00:43<03:37,  1.75it/s][A
Iteration:  23%|██▎       | 114/494 [00:43<03:03,  2.07it/s][A
Iteration:  23%|██▎       | 115/494 [00:43<02:40,  2.37it/s][A
Iteration:  23%|██▎       | 116/494 [00:44<02:23,  2.64it/s][A
Iteration:  24%|██▎       | 117/494 [00:44<02:11,  2.87it/s][A
Iteration:  24%|██▍       | 118/494 [00:44<02:03,  3.04it/s][A
Iteration:  24%|██▍       | 119/494 [00:45<01:57,  3.19it/s][A
Iteration:  24%|██▍       | 120/494 [00:45<01:53,  3.29it/s][A
Iteration:  24%|██▍       | 121/494 [00:45<01:50,  3.37it/s][A
Iteration:  25%|██▍       | 122/494 [00

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.99it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.58it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.47it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.46it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.44it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.47it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.47it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.48it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.49it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.47it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.45it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.48it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.49it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.48it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6869555086533389
  %s = %s bias_weighted_f1 0.7963648752853909
  %s = %s hate_macro_f1 0.6097494224514264
  %s = %s hate_weighted_f1 0.6060766798459231
  %s = %s loss 1.1328341205914816
  %s = %s mean_weighted_f1 0.7012207775656569



Iteration:  62%|██████▏   | 306/494 [01:47<09:08,  2.92s/it][A

Saving model checkpoint to %s ./model



Iteration:  62%|██████▏   | 307/494 [01:47<06:41,  2.15s/it][A
Iteration:  62%|██████▏   | 308/494 [01:47<04:54,  1.58s/it][A
Iteration:  63%|██████▎   | 309/494 [01:48<03:40,  1.19s/it][A
Iteration:  63%|██████▎   | 310/494 [01:48<02:48,  1.09it/s][A
Iteration:  63%|██████▎   | 311/494 [01:48<02:12,  1.38it/s][A
Iteration:  63%|██████▎   | 312/494 [01:48<01:47,  1.70it/s][A
Iteration:  63%|██████▎   | 313/494 [01:49<01:29,  2.02it/s][A
Iteration:  64%|██████▎   | 314/494 [01:49<01:17,  2.33it/s][A
Iteration:  64%|██████▍   | 315/494 [01:49<01:08,  2.60it/s][A
Iteration:  64%|██████▍   | 316/494 [01:49<01:02,  2.84it/s][A
Iteration:  64%|██████▍   | 317/494 [01:50<00:58,  3.03it/s][A
Iteration:  64%|██████▍   | 318/494 [01:50<00:55,  3.19it/s][A
Iteration:  65%|██████▍   | 319/494 [01:50<00:52,  3.31it/s][A
Iteration:  65%|██████▍   | 320/494 [01:51<00:51,  3.39it/s][A
Iteration:  65%|██████▍   | 321/494 [01:51<00:50,  3.46it/s][A
Iteration:  65%|██████▌   | 322/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.53it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.41it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.44it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.41it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.32it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.36it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.36it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.37it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.39it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6916096351527381
  %s = %s bias_weighted_f1 0.8115485815330744
  %s = %s hate_macro_f1 0.5810008479891663
  %s = %s hate_weighted_f1 0.5707020378904445
  %s = %s loss 1.198359235127767
  %s = %s mean_weighted_f1 0.6911253097117595



Iteration:   3%|▎         | 13/494 [00:06<06:58,  1.15it/s][A
Iteration:   3%|▎         | 14/494 [00:06<05:32,  1.44it/s][A
Iteration:   3%|▎         | 15/494 [00:07<04:32,  1.76it/s][A
Iteration:   3%|▎         | 16/494 [00:07<03:51,  2.06it/s][A
Iteration:   3%|▎         | 17/494 [00:07<03:21,  2.37it/s][A
Iteration:   4%|▎         | 18/494 [00:07<03:01,  2.62it/s][A
Iteration:   4%|▍         | 19/494 [00:08<02:46,  2.85it/s][A
Iteration:   4%|▍         | 20/494 [00:08<02:36,  3.02it/s][A
Iteration:   4%|▍         | 21/494 [00:08<02:29,  3.16it/s][A
Iteration:   4%|▍         | 22/494 [00:09<02:24,  3.27it/s][A
Iteration:   5%|▍         | 23/494 [00:09<02:20,  3.34it/s][A
Iteration:   5%|▍         | 24/494 [00:09<02:18,  3.40it/s][A
Iteration:   5%|▌         | 25/494 [00:09<02:16,  3.45it/s][A
Iteration:   5%|▌         | 26/494 [00:10<02:13,  3.49it/s][A
Iteration:   5%|▌         | 27/494 [00:10<02:12,  3.53it/s][A
Iteration:   6%|▌         | 28/494 [00:10<02:12,  3.53

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.75it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.37it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.39it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.36it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.32it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.36it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.37it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6815353720961009
  %s = %s bias_weighted_f1 0.7719972619178479
  %s = %s hate_macro_f1 0.6563773905513482
  %s = %s hate_weighted_f1 0.656057275540575
  %s = %s loss 1.2395560026168824
  %s = %s mean_weighted_f1 0.7140272687292115



Iteration:  43%|████▎     | 212/494 [01:11<14:12,  3.02s/it][A

Saving model checkpoint to %s ./model



Iteration:  43%|████▎     | 213/494 [01:11<10:27,  2.23s/it][A
Iteration:  43%|████▎     | 214/494 [01:12<07:41,  1.65s/it][A
Iteration:  44%|████▎     | 215/494 [01:12<05:47,  1.24s/it][A
Iteration:  44%|████▎     | 216/494 [01:12<04:28,  1.04it/s][A
Iteration:  44%|████▍     | 217/494 [01:13<03:35,  1.28it/s][A
Iteration:  44%|████▍     | 218/494 [01:13<02:54,  1.58it/s][A
Iteration:  44%|████▍     | 219/494 [01:13<02:24,  1.90it/s][A
Iteration:  45%|████▍     | 220/494 [01:14<02:05,  2.19it/s][A
Iteration:  45%|████▍     | 221/494 [01:14<01:53,  2.41it/s][A
Iteration:  45%|████▍     | 222/494 [01:14<01:44,  2.61it/s][A
Iteration:  45%|████▌     | 223/494 [01:14<01:35,  2.84it/s][A
Iteration:  45%|████▌     | 224/494 [01:15<01:30,  2.99it/s][A
Iteration:  46%|████▌     | 225/494 [01:15<01:26,  3.11it/s][A
Iteration:  46%|████▌     | 226/494 [01:15<01:24,  3.16it/s][A
Iteration:  46%|████▌     | 227/494 [01:16<01:24,  3.18it/s][A
Iteration:  46%|████▌     | 228/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.81it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.39it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.37it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.39it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.37it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.44it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.47it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6508420520048427
  %s = %s bias_weighted_f1 0.7790830032720124
  %s = %s hate_macro_f1 0.6403251874976615
  %s = %s hate_weighted_f1 0.6334711559110978
  %s = %s loss 1.2830648342768352
  %s = %s mean_weighted_f1 0.7062770795915552



Iteration:  84%|████████▎ | 413/494 [02:12<01:09,  1.16it/s][A
Iteration:  84%|████████▍ | 414/494 [02:12<00:55,  1.45it/s][A
Iteration:  84%|████████▍ | 415/494 [02:13<00:44,  1.77it/s][A
Iteration:  84%|████████▍ | 416/494 [02:13<00:37,  2.08it/s][A
Iteration:  84%|████████▍ | 417/494 [02:13<00:32,  2.38it/s][A
Iteration:  85%|████████▍ | 418/494 [02:14<00:28,  2.64it/s][A
Iteration:  85%|████████▍ | 419/494 [02:14<00:26,  2.86it/s][A
Iteration:  85%|████████▌ | 420/494 [02:14<00:24,  3.04it/s][A
Iteration:  85%|████████▌ | 421/494 [02:14<00:22,  3.18it/s][A
Iteration:  85%|████████▌ | 422/494 [02:15<00:21,  3.29it/s][A
Iteration:  86%|████████▌ | 423/494 [02:15<00:21,  3.36it/s][A
Iteration:  86%|████████▌ | 424/494 [02:15<00:20,  3.41it/s][A
Iteration:  86%|████████▌ | 425/494 [02:16<00:19,  3.48it/s][A
Iteration:  86%|████████▌ | 426/494 [02:16<00:19,  3.49it/s][A
Iteration:  86%|████████▋ | 427/494 [02:16<00:19,  3.51it/s][A
Iteration:  87%|████████▋ | 428/494 [02

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.66it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.34it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.37it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.41it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.30it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.40it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.38it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.39it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6821101084684006
  %s = %s bias_weighted_f1 0.8094586804940852
  %s = %s hate_macro_f1 0.5951877603937454
  %s = %s hate_weighted_f1 0.5885564473189979
  %s = %s loss 1.6340181191762289
  %s = %s mean_weighted_f1 0.6990075639065416



Iteration:  24%|██▍       | 119/494 [00:36<05:24,  1.16it/s][A
Iteration:  24%|██▍       | 120/494 [00:36<04:18,  1.45it/s][A
Iteration:  24%|██▍       | 121/494 [00:36<03:31,  1.76it/s][A
Iteration:  25%|██▍       | 122/494 [00:37<03:00,  2.06it/s][A
Iteration:  25%|██▍       | 123/494 [00:37<02:36,  2.36it/s][A
Iteration:  25%|██▌       | 124/494 [00:37<02:21,  2.62it/s][A
Iteration:  25%|██▌       | 125/494 [00:38<02:10,  2.83it/s][A
Iteration:  26%|██▌       | 126/494 [00:38<02:02,  3.00it/s][A
Iteration:  26%|██▌       | 127/494 [00:38<01:56,  3.15it/s][A
Iteration:  26%|██▌       | 128/494 [00:38<01:52,  3.26it/s][A
Iteration:  26%|██▌       | 129/494 [00:39<01:49,  3.33it/s][A
Iteration:  26%|██▋       | 130/494 [00:39<01:47,  3.39it/s][A
Iteration:  27%|██▋       | 131/494 [00:39<01:45,  3.44it/s][A
Iteration:  27%|██▋       | 132/494 [00:40<01:43,  3.48it/s][A
Iteration:  27%|██▋       | 133/494 [00:40<01:43,  3.48it/s][A
Iteration:  27%|██▋       | 134/494 [00

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.67it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.37it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.38it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.35it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.40it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.36it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.35it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.37it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.704422565741028
  %s = %s bias_weighted_f1 0.8057236089173783
  %s = %s hate_macro_f1 0.6189643194447586
  %s = %s hate_weighted_f1 0.6096084254117732
  %s = %s loss 1.6886233886082966
  %s = %s mean_weighted_f1 0.7076660171645757



Iteration:  65%|██████▍   | 319/494 [01:35<02:31,  1.16it/s][A
Iteration:  65%|██████▍   | 320/494 [01:36<02:00,  1.45it/s][A
Iteration:  65%|██████▍   | 321/494 [01:36<01:37,  1.77it/s][A
Iteration:  65%|██████▌   | 322/494 [01:36<01:22,  2.07it/s][A
Iteration:  65%|██████▌   | 323/494 [01:37<01:11,  2.38it/s][A
Iteration:  66%|██████▌   | 324/494 [01:37<01:04,  2.64it/s][A
Iteration:  66%|██████▌   | 325/494 [01:37<00:59,  2.85it/s][A
Iteration:  66%|██████▌   | 326/494 [01:37<00:55,  3.03it/s][A
Iteration:  66%|██████▌   | 327/494 [01:38<00:52,  3.17it/s][A
Iteration:  66%|██████▋   | 328/494 [01:38<00:50,  3.28it/s][A
Iteration:  67%|██████▋   | 329/494 [01:38<00:49,  3.33it/s][A
Iteration:  67%|██████▋   | 330/494 [01:38<00:48,  3.40it/s][A
Iteration:  67%|██████▋   | 331/494 [01:39<00:47,  3.44it/s][A
Iteration:  67%|██████▋   | 332/494 [01:39<00:46,  3.48it/s][A
Iteration:  67%|██████▋   | 333/494 [01:39<00:46,  3.49it/s][A
Iteration:  68%|██████▊   | 334/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.73it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.41it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.39it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.35it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.34it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.35it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.36it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.34it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6940424557078648
  %s = %s bias_weighted_f1 0.7900569593396015
  %s = %s hate_macro_f1 0.6526814956408465
  %s = %s hate_weighted_f1 0.6500735650047141
  %s = %s loss 1.5469295819600424
  %s = %s mean_weighted_f1 0.7200652621721578



Iteration:   5%|▍         | 24/494 [00:15<22:57,  2.93s/it][A

Saving model checkpoint to %s ./model



Iteration:   5%|▌         | 25/494 [00:15<16:54,  2.16s/it][A
Iteration:   5%|▌         | 26/494 [00:16<12:26,  1.60s/it][A
Iteration:   5%|▌         | 27/494 [00:16<09:19,  1.20s/it][A
Iteration:   6%|▌         | 28/494 [00:16<07:08,  1.09it/s][A
Iteration:   6%|▌         | 29/494 [00:17<05:40,  1.37it/s][A
Iteration:   6%|▌         | 30/494 [00:17<04:35,  1.68it/s][A
Iteration:   6%|▋         | 31/494 [00:17<03:50,  2.01it/s][A
Iteration:   6%|▋         | 32/494 [00:17<03:20,  2.31it/s][A
Iteration:   7%|▋         | 33/494 [00:18<02:58,  2.58it/s][A
Iteration:   7%|▋         | 34/494 [00:18<02:42,  2.83it/s][A
Iteration:   7%|▋         | 35/494 [00:18<02:32,  3.01it/s][A
Iteration:   7%|▋         | 36/494 [00:19<02:25,  3.15it/s][A
Iteration:   7%|▋         | 37/494 [00:19<02:20,  3.26it/s][A
Iteration:   8%|▊         | 38/494 [00:19<02:16,  3.34it/s][A
Iteration:   8%|▊         | 39/494 [00:19<02:12,  3.43it/s][A
Iteration:   8%|▊         | 40/494 [00:20<02:10,  3.47

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.58it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.45it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.42it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.39it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.40it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.45it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.49it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.50it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.50it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.47it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.45it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.46it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7081390042269854
  %s = %s bias_weighted_f1 0.8107712383916346
  %s = %s hate_macro_f1 0.6656880177039376
  %s = %s hate_weighted_f1 0.6586102808839484
  %s = %s loss 1.6510934273401896
  %s = %s mean_weighted_f1 0.7346907596377914



Iteration:  45%|████▌     | 224/494 [01:15<06:50,  1.52s/it][A

Saving model checkpoint to %s ./model



Iteration:  46%|████▌     | 225/494 [01:16<05:08,  1.15s/it][A
Iteration:  46%|████▌     | 226/494 [01:16<03:57,  1.13it/s][A
Iteration:  46%|████▌     | 227/494 [01:16<03:07,  1.43it/s][A
Iteration:  46%|████▌     | 228/494 [01:17<02:35,  1.71it/s][A
Iteration:  46%|████▋     | 229/494 [01:17<02:09,  2.04it/s][A
Iteration:  47%|████▋     | 230/494 [01:17<01:52,  2.35it/s][A
Iteration:  47%|████▋     | 231/494 [01:17<01:40,  2.61it/s][A
Iteration:  47%|████▋     | 232/494 [01:18<01:32,  2.83it/s][A
Iteration:  47%|████▋     | 233/494 [01:18<01:26,  3.01it/s][A
Iteration:  47%|████▋     | 234/494 [01:18<01:22,  3.16it/s][A
Iteration:  48%|████▊     | 235/494 [01:18<01:19,  3.27it/s][A
Iteration:  48%|████▊     | 236/494 [01:19<01:17,  3.35it/s][A
Iteration:  48%|████▊     | 237/494 [01:19<01:14,  3.43it/s][A
Iteration:  48%|████▊     | 238/494 [01:19<01:13,  3.47it/s][A
Iteration:  48%|████▊     | 239/494 [01:20<01:12,  3.49it/s][A
Iteration:  49%|████▊     | 240/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.62it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.47it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.32it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.40it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.33it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.42it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6958051404221876
  %s = %s bias_weighted_f1 0.8034851987656454
  %s = %s hate_macro_f1 0.6562306776288984
  %s = %s hate_weighted_f1 0.6518659197191874
  %s = %s loss 1.7525777578353883
  %s = %s mean_weighted_f1 0.7276755592424164



Iteration:  86%|████████▌ | 425/494 [02:15<00:59,  1.16it/s][A
Iteration:  86%|████████▌ | 426/494 [02:15<00:46,  1.45it/s][A
Iteration:  86%|████████▋ | 427/494 [02:15<00:37,  1.77it/s][A
Iteration:  87%|████████▋ | 428/494 [02:16<00:31,  2.08it/s][A
Iteration:  87%|████████▋ | 429/494 [02:16<00:27,  2.37it/s][A
Iteration:  87%|████████▋ | 430/494 [02:16<00:24,  2.63it/s][A
Iteration:  87%|████████▋ | 431/494 [02:17<00:22,  2.84it/s][A
Iteration:  87%|████████▋ | 432/494 [02:17<00:20,  3.03it/s][A
Iteration:  88%|████████▊ | 433/494 [02:17<00:19,  3.13it/s][A
Iteration:  88%|████████▊ | 434/494 [02:17<00:18,  3.24it/s][A
Iteration:  88%|████████▊ | 435/494 [02:18<00:17,  3.32it/s][A
Iteration:  88%|████████▊ | 436/494 [02:18<00:17,  3.35it/s][A
Iteration:  88%|████████▊ | 437/494 [02:18<00:16,  3.41it/s][A
Iteration:  89%|████████▊ | 438/494 [02:19<00:16,  3.45it/s][A
Iteration:  89%|████████▉ | 439/494 [02:19<00:15,  3.48it/s][A
Iteration:  89%|████████▉ | 440/494 [02

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.78it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.51it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.45it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.39it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.36it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.35it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.37it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.39it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.32it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7036356764928194
  %s = %s bias_weighted_f1 0.8036886346895445
  %s = %s hate_macro_f1 0.6550914904816901
  %s = %s hate_weighted_f1 0.6478253367580242
  %s = %s loss 1.8851635654767354
  %s = %s mean_weighted_f1 0.7257569857237844



Iteration:  27%|██▋       | 131/494 [00:39<05:14,  1.15it/s][A
Iteration:  27%|██▋       | 132/494 [00:40<04:09,  1.45it/s][A
Iteration:  27%|██▋       | 133/494 [00:40<03:23,  1.77it/s][A
Iteration:  27%|██▋       | 134/494 [00:40<02:54,  2.06it/s][A
Iteration:  27%|██▋       | 135/494 [00:40<02:31,  2.36it/s][A
Iteration:  28%|██▊       | 136/494 [00:41<02:15,  2.64it/s][A
Iteration:  28%|██▊       | 137/494 [00:41<02:05,  2.85it/s][A
Iteration:  28%|██▊       | 138/494 [00:41<01:57,  3.03it/s][A
Iteration:  28%|██▊       | 139/494 [00:42<01:52,  3.15it/s][A
Iteration:  28%|██▊       | 140/494 [00:42<01:48,  3.26it/s][A
Iteration:  29%|██▊       | 141/494 [00:42<01:46,  3.31it/s][A
Iteration:  29%|██▊       | 142/494 [00:42<01:44,  3.36it/s][A
Iteration:  29%|██▉       | 143/494 [00:43<01:42,  3.42it/s][A
Iteration:  29%|██▉       | 144/494 [00:43<01:41,  3.45it/s][A
Iteration:  29%|██▉       | 145/494 [00:43<01:40,  3.46it/s][A
Iteration:  30%|██▉       | 146/494 [00

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.54it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.36it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.41it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.33it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.35it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.40it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.42it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.44it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7311716610744726
  %s = %s bias_weighted_f1 0.825692207505405
  %s = %s hate_macro_f1 0.6563147380948232
  %s = %s hate_weighted_f1 0.6519946649420502
  %s = %s loss 1.8376062870025636
  %s = %s mean_weighted_f1 0.7388434362237275



Iteration:  67%|██████▋   | 330/494 [01:40<04:15,  1.56s/it][A

Saving model checkpoint to %s ./model



Iteration:  67%|██████▋   | 331/494 [01:40<03:12,  1.18s/it][A
Iteration:  67%|██████▋   | 332/494 [01:40<02:27,  1.10it/s][A
Iteration:  67%|██████▋   | 333/494 [01:41<01:55,  1.39it/s][A
Iteration:  68%|██████▊   | 334/494 [01:41<01:34,  1.69it/s][A
Iteration:  68%|██████▊   | 335/494 [01:41<01:19,  2.00it/s][A
Iteration:  68%|██████▊   | 336/494 [01:41<01:08,  2.31it/s][A
Iteration:  68%|██████▊   | 337/494 [01:42<01:00,  2.58it/s][A
Iteration:  68%|██████▊   | 338/494 [01:42<00:55,  2.80it/s][A
Iteration:  69%|██████▊   | 339/494 [01:42<00:51,  2.98it/s][A
Iteration:  69%|██████▉   | 340/494 [01:43<00:48,  3.15it/s][A
Iteration:  69%|██████▉   | 341/494 [01:43<00:47,  3.25it/s][A
Iteration:  69%|██████▉   | 342/494 [01:43<00:45,  3.32it/s][A
Iteration:  69%|██████▉   | 343/494 [01:43<00:44,  3.40it/s][A
Iteration:  70%|██████▉   | 344/494 [01:44<00:43,  3.44it/s][A
Iteration:  70%|██████▉   | 345/494 [01:44<00:42,  3.47it/s][A
Iteration:  70%|███████   | 346/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.75it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.47it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.52it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.42it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.39it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.44it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7148151202336953
  %s = %s bias_weighted_f1 0.8038191603524937
  %s = %s hate_macro_f1 0.6615336470494014
  %s = %s hate_weighted_f1 0.6573012481988181
  %s = %s loss 2.032802208264669
  %s = %s mean_weighted_f1 0.7305602042756558



Iteration:   7%|▋         | 37/494 [00:13<06:34,  1.16it/s][A
Iteration:   8%|▊         | 38/494 [00:13<05:13,  1.46it/s][A
Iteration:   8%|▊         | 39/494 [00:13<04:17,  1.77it/s][A
Iteration:   8%|▊         | 40/494 [00:14<03:38,  2.07it/s][A
Iteration:   8%|▊         | 41/494 [00:14<03:10,  2.37it/s][A
Iteration:   9%|▊         | 42/494 [00:14<02:51,  2.64it/s][A
Iteration:   9%|▊         | 43/494 [00:14<02:37,  2.86it/s][A
Iteration:   9%|▉         | 44/494 [00:15<02:29,  3.02it/s][A
Iteration:   9%|▉         | 45/494 [00:15<02:22,  3.16it/s][A
Iteration:   9%|▉         | 46/494 [00:15<02:16,  3.28it/s][A
Iteration:  10%|▉         | 47/494 [00:16<02:13,  3.34it/s][A
Iteration:  10%|▉         | 48/494 [00:16<02:11,  3.39it/s][A
Iteration:  10%|▉         | 49/494 [00:16<02:10,  3.42it/s][A
Iteration:  10%|█         | 50/494 [00:16<02:08,  3.45it/s][A
Iteration:  10%|█         | 51/494 [00:17<02:07,  3.47it/s][A
Iteration:  11%|█         | 52/494 [00:17<02:06,  3.49

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.76it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.40it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.45it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.48it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.37it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.46it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.43it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.45it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.42it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.37it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7128433019884616
  %s = %s bias_weighted_f1 0.8096391198709323
  %s = %s hate_macro_f1 0.672568893858441
  %s = %s hate_weighted_f1 0.667777034196714
  %s = %s loss 2.1060171604156492
  %s = %s mean_weighted_f1 0.7387080770338231



Iteration:  48%|████▊     | 237/494 [01:12<03:42,  1.16it/s][A
Iteration:  48%|████▊     | 238/494 [01:12<02:56,  1.45it/s][A
Iteration:  48%|████▊     | 239/494 [01:12<02:24,  1.77it/s][A
Iteration:  49%|████▊     | 240/494 [01:13<02:02,  2.07it/s][A
Iteration:  49%|████▉     | 241/494 [01:13<01:46,  2.37it/s][A
Iteration:  49%|████▉     | 242/494 [01:13<01:35,  2.63it/s][A
Iteration:  49%|████▉     | 243/494 [01:14<01:27,  2.85it/s][A
Iteration:  49%|████▉     | 244/494 [01:14<01:22,  3.04it/s][A
Iteration:  50%|████▉     | 245/494 [01:14<01:18,  3.18it/s][A
Iteration:  50%|████▉     | 246/494 [01:14<01:15,  3.28it/s][A
Iteration:  50%|█████     | 247/494 [01:15<01:13,  3.35it/s][A
Iteration:  50%|█████     | 248/494 [01:15<01:12,  3.40it/s][A
Iteration:  50%|█████     | 249/494 [01:15<01:10,  3.46it/s][A
Iteration:  51%|█████     | 250/494 [01:16<01:10,  3.48it/s][A
Iteration:  51%|█████     | 251/494 [01:16<01:09,  3.49it/s][A
Iteration:  51%|█████     | 252/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.66it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.37it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.40it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.38it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.38it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.41it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.35it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.39it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.42it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.34it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7023267136332798
  %s = %s bias_weighted_f1 0.8024605812757313
  %s = %s hate_macro_f1 0.6478406597774077
  %s = %s hate_weighted_f1 0.6472527831418239
  %s = %s loss 2.436918354034424
  %s = %s mean_weighted_f1 0.7248566822087776



Iteration:  88%|████████▊ | 437/494 [02:11<00:49,  1.16it/s][A
Iteration:  89%|████████▊ | 438/494 [02:11<00:38,  1.45it/s][A
Iteration:  89%|████████▉ | 439/494 [02:12<00:31,  1.76it/s][A
Iteration:  89%|████████▉ | 440/494 [02:12<00:26,  2.07it/s][A
Iteration:  89%|████████▉ | 441/494 [02:12<00:22,  2.35it/s][A
Iteration:  89%|████████▉ | 442/494 [02:12<00:19,  2.62it/s][A
Iteration:  90%|████████▉ | 443/494 [02:13<00:17,  2.85it/s][A
Iteration:  90%|████████▉ | 444/494 [02:13<00:16,  3.00it/s][A
Iteration:  90%|█████████ | 445/494 [02:13<00:15,  3.16it/s][A
Iteration:  90%|█████████ | 446/494 [02:14<00:14,  3.25it/s][A
Iteration:  90%|█████████ | 447/494 [02:14<00:14,  3.34it/s][A
Iteration:  91%|█████████ | 448/494 [02:14<00:13,  3.39it/s][A
Iteration:  91%|█████████ | 449/494 [02:14<00:13,  3.45it/s][A
Iteration:  91%|█████████ | 450/494 [02:15<00:12,  3.48it/s][A
Iteration:  91%|█████████▏| 451/494 [02:15<00:12,  3.50it/s][A
Iteration:  91%|█████████▏| 452/494 [02

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.86it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.52it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.45it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.43it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.37it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.44it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.48it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.45it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.44it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7265335059040914
  %s = %s bias_weighted_f1 0.8205134394850513
  %s = %s hate_macro_f1 0.6545941069490272
  %s = %s hate_weighted_f1 0.650281982190989
  %s = %s loss 2.4385952790578207
  %s = %s mean_weighted_f1 0.7353977108380201



Iteration:  29%|██▉       | 143/494 [00:43<05:02,  1.16it/s][A
Iteration:  29%|██▉       | 144/494 [00:43<04:01,  1.45it/s][A
Iteration:  29%|██▉       | 145/494 [00:43<03:18,  1.76it/s][A
Iteration:  30%|██▉       | 146/494 [00:43<02:49,  2.06it/s][A
Iteration:  30%|██▉       | 147/494 [00:44<02:27,  2.35it/s][A
Iteration:  30%|██▉       | 148/494 [00:44<02:13,  2.60it/s][A
Iteration:  30%|███       | 149/494 [00:44<02:02,  2.82it/s][A
Iteration:  30%|███       | 150/494 [00:45<01:54,  3.00it/s][A
Iteration:  31%|███       | 151/494 [00:45<01:50,  3.11it/s][A
Iteration:  31%|███       | 152/494 [00:45<01:46,  3.22it/s][A
Iteration:  31%|███       | 153/494 [00:45<01:44,  3.28it/s][A
Iteration:  31%|███       | 154/494 [00:46<01:41,  3.35it/s][A
Iteration:  31%|███▏      | 155/494 [00:46<01:40,  3.37it/s][A
Iteration:  32%|███▏      | 156/494 [00:46<01:38,  3.44it/s][A
Iteration:  32%|███▏      | 157/494 [00:47<01:37,  3.44it/s][A
Iteration:  32%|███▏      | 158/494 [00

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.74it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.40it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.50it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.50it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.44it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.47it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.40it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.40it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.41it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.42it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.45it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.71062345786948
  %s = %s bias_weighted_f1 0.80718670720152
  %s = %s hate_macro_f1 0.6713892639320019
  %s = %s hate_weighted_f1 0.6694054520707344
  %s = %s loss 2.3769174893697103
  %s = %s mean_weighted_f1 0.7382960796361272



Iteration:  69%|██████▉   | 343/494 [01:42<02:09,  1.16it/s][A
Iteration:  70%|██████▉   | 344/494 [01:42<01:42,  1.46it/s][A
Iteration:  70%|██████▉   | 345/494 [01:42<01:23,  1.78it/s][A
Iteration:  70%|███████   | 346/494 [01:43<01:11,  2.08it/s][A
Iteration:  70%|███████   | 347/494 [01:43<01:01,  2.38it/s][A
Iteration:  70%|███████   | 348/494 [01:43<00:55,  2.64it/s][A
Iteration:  71%|███████   | 349/494 [01:43<00:50,  2.85it/s][A
Iteration:  71%|███████   | 350/494 [01:44<00:47,  3.03it/s][A
Iteration:  71%|███████   | 351/494 [01:44<00:45,  3.17it/s][A
Iteration:  71%|███████▏  | 352/494 [01:44<00:43,  3.28it/s][A
Iteration:  71%|███████▏  | 353/494 [01:45<00:42,  3.34it/s][A
Iteration:  72%|███████▏  | 354/494 [01:45<00:41,  3.39it/s][A
Iteration:  72%|███████▏  | 355/494 [01:45<00:40,  3.44it/s][A
Iteration:  72%|███████▏  | 356/494 [01:45<00:39,  3.45it/s][A
Iteration:  72%|███████▏  | 357/494 [01:46<00:39,  3.48it/s][A
Iteration:  72%|███████▏  | 358/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.78it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.47it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.48it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.41it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.32it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.41it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.34it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7060523554849037
  %s = %s bias_weighted_f1 0.8080908713631394
  %s = %s hate_macro_f1 0.6649629436087575
  %s = %s hate_weighted_f1 0.6607920929599923
  %s = %s loss 2.418032161394755
  %s = %s mean_weighted_f1 0.7344414821615659



Iteration:  10%|▉         | 49/494 [00:16<06:24,  1.16it/s][A
Iteration:  10%|█         | 50/494 [00:16<05:05,  1.45it/s][A
Iteration:  10%|█         | 51/494 [00:17<04:10,  1.77it/s][A
Iteration:  11%|█         | 52/494 [00:17<03:33,  2.07it/s][A
Iteration:  11%|█         | 53/494 [00:17<03:07,  2.35it/s][A
Iteration:  11%|█         | 54/494 [00:18<02:48,  2.62it/s][A
Iteration:  11%|█         | 55/494 [00:18<02:34,  2.85it/s][A
Iteration:  11%|█▏        | 56/494 [00:18<02:25,  3.02it/s][A
Iteration:  12%|█▏        | 57/494 [00:18<02:19,  3.14it/s][A
Iteration:  12%|█▏        | 58/494 [00:19<02:13,  3.26it/s][A
Iteration:  12%|█▏        | 59/494 [00:19<02:09,  3.35it/s][A
Iteration:  12%|█▏        | 60/494 [00:19<02:07,  3.39it/s][A
Iteration:  12%|█▏        | 61/494 [00:20<02:05,  3.44it/s][A
Iteration:  13%|█▎        | 62/494 [00:20<02:04,  3.47it/s][A
Iteration:  13%|█▎        | 63/494 [00:20<02:03,  3.49it/s][A
Iteration:  13%|█▎        | 64/494 [00:20<02:03,  3.48

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.74it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.52it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.52it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.48it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.40it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.40it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.41it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.39it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7316599398883641
  %s = %s bias_weighted_f1 0.8249567540810548
  %s = %s hate_macro_f1 0.6477879599688005
  %s = %s hate_weighted_f1 0.6452232360605541
  %s = %s loss 2.681439479192098
  %s = %s mean_weighted_f1 0.7350899950708045



Iteration:  50%|█████     | 249/494 [01:15<03:30,  1.16it/s][A
Iteration:  51%|█████     | 250/494 [01:16<02:47,  1.45it/s][A
Iteration:  51%|█████     | 251/494 [01:16<02:18,  1.76it/s][A
Iteration:  51%|█████     | 252/494 [01:16<01:57,  2.06it/s][A
Iteration:  51%|█████     | 253/494 [01:16<01:42,  2.35it/s][A
Iteration:  51%|█████▏    | 254/494 [01:17<01:31,  2.62it/s][A
Iteration:  52%|█████▏    | 255/494 [01:17<01:24,  2.82it/s][A
Iteration:  52%|█████▏    | 256/494 [01:17<01:19,  3.00it/s][A
Iteration:  52%|█████▏    | 257/494 [01:18<01:15,  3.14it/s][A
Iteration:  52%|█████▏    | 258/494 [01:18<01:12,  3.24it/s][A
Iteration:  52%|█████▏    | 259/494 [01:18<01:10,  3.32it/s][A
Iteration:  53%|█████▎    | 260/494 [01:18<01:09,  3.37it/s][A
Iteration:  53%|█████▎    | 261/494 [01:19<01:08,  3.40it/s][A
Iteration:  53%|█████▎    | 262/494 [01:19<01:08,  3.40it/s][A
Iteration:  53%|█████▎    | 263/494 [01:19<01:07,  3.44it/s][A
Iteration:  53%|█████▎    | 264/494 [01

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.81it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.47it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.47it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.41it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.32it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.38it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.41it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.40it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.37it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.33it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.36it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.7121633639169311
  %s = %s bias_weighted_f1 0.8097699649181535
  %s = %s hate_macro_f1 0.6751230953540226
  %s = %s hate_weighted_f1 0.6710264881030864
  %s = %s loss 2.6597737312316894
  %s = %s mean_weighted_f1 0.7403982265106199



Iteration:  91%|█████████ | 448/494 [02:21<02:18,  3.02s/it][A

Saving model checkpoint to %s ./model



Iteration:  91%|█████████ | 449/494 [02:21<01:39,  2.22s/it][A
Iteration:  91%|█████████ | 450/494 [02:21<01:11,  1.63s/it][A
Iteration:  91%|█████████▏| 451/494 [02:22<00:52,  1.22s/it][A
Iteration:  91%|█████████▏| 452/494 [02:22<00:39,  1.07it/s][A
Iteration:  92%|█████████▏| 453/494 [02:22<00:30,  1.35it/s][A
Iteration:  92%|█████████▏| 454/494 [02:22<00:24,  1.65it/s][A
Iteration:  92%|█████████▏| 455/494 [02:23<00:19,  1.98it/s][A
Iteration:  92%|█████████▏| 456/494 [02:23<00:16,  2.29it/s][A
Iteration:  93%|█████████▎| 457/494 [02:23<00:14,  2.54it/s][A
Iteration:  93%|█████████▎| 458/494 [02:24<00:12,  2.79it/s][A
Iteration:  93%|█████████▎| 459/494 [02:24<00:11,  2.99it/s][A
Iteration:  93%|█████████▎| 460/494 [02:24<00:10,  3.14it/s][A
Iteration:  93%|█████████▎| 461/494 [02:24<00:10,  3.25it/s][A
Iteration:  94%|█████████▎| 462/494 [02:25<00:09,  3.34it/s][A
Iteration:  94%|█████████▎| 463/494 [02:25<00:09,  3.41it/s][A
Iteration:  94%|█████████▍| 464/494 [02

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.88it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.47it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.48it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.41it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.37it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.44it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.43it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.42it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.33it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.40it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.45it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.43it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.6992904477063471
  %s = %s bias_weighted_f1 0.802799521633074
  %s = %s hate_macro_f1 0.6533467473720093
  %s = %s hate_weighted_f1 0.6517400958415553
  %s = %s loss 2.770048157374064
  %s = %s mean_weighted_f1 0.7272698087373146



Iteration:  31%|███▏      | 155/494 [00:46<04:53,  1.16it/s][A
Iteration:  32%|███▏      | 156/494 [00:47<03:52,  1.45it/s][A
Iteration:  32%|███▏      | 157/494 [00:47<03:10,  1.77it/s][A
Iteration:  32%|███▏      | 158/494 [00:47<02:41,  2.07it/s][A
Iteration:  32%|███▏      | 159/494 [00:47<02:20,  2.38it/s][A
Iteration:  32%|███▏      | 160/494 [00:48<02:06,  2.65it/s][A
Iteration:  33%|███▎      | 161/494 [00:48<01:56,  2.87it/s][A
Iteration:  33%|███▎      | 162/494 [00:48<01:48,  3.05it/s][A
Iteration:  33%|███▎      | 163/494 [00:49<01:43,  3.19it/s][A
Iteration:  33%|███▎      | 164/494 [00:49<01:39,  3.30it/s][A
Iteration:  33%|███▎      | 165/494 [00:49<01:37,  3.39it/s][A
Iteration:  34%|███▎      | 166/494 [00:49<01:35,  3.43it/s][A
Iteration:  34%|███▍      | 167/494 [00:50<01:33,  3.49it/s][A
Iteration:  34%|███▍      | 168/494 [00:50<01:33,  3.50it/s][A
Iteration:  34%|███▍      | 169/494 [00:50<01:32,  3.53it/s][A
Iteration:  34%|███▍      | 170/494 [00

***** Running evaluation on %s dataset ***** dev
  Num examples = %d 471
  Batch size = %d 32




Evaluating:   0%|          | 0/15 [00:00<?, ?it/s][A[A

Evaluating:   7%|▋         | 1/15 [00:00<00:02,  5.70it/s][A[A

Evaluating:  13%|█▎        | 2/15 [00:00<00:02,  5.45it/s][A[A

Evaluating:  20%|██        | 3/15 [00:00<00:02,  5.46it/s][A[A

Evaluating:  27%|██▋       | 4/15 [00:00<00:02,  5.39it/s][A[A

Evaluating:  33%|███▎      | 5/15 [00:00<00:01,  5.33it/s][A[A

Evaluating:  40%|████      | 6/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  47%|████▋     | 7/15 [00:01<00:01,  5.39it/s][A[A

Evaluating:  53%|█████▎    | 8/15 [00:01<00:01,  5.37it/s][A[A

Evaluating:  60%|██████    | 9/15 [00:01<00:01,  5.35it/s][A[A

Evaluating:  67%|██████▋   | 10/15 [00:01<00:00,  5.35it/s][A[A

Evaluating:  73%|███████▎  | 11/15 [00:02<00:00,  5.38it/s][A[A

Evaluating:  80%|████████  | 12/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:  87%|████████▋ | 13/15 [00:02<00:00,  5.41it/s][A[A

Evaluating:  93%|█████████▎| 14/15 [00:02<00:00,  5.35it/s][A[A

Evaluating:

***** Eval results *****
  %s = %s bias_macro_f1 0.693354949679669
  %s = %s bias_weighted_f1 0.7961902000484149
  %s = %s hate_macro_f1 0.6800958198844821
  %s = %s hate_weighted_f1 0.6759342766510119
  %s = %s loss 2.6339526335398356
  %s = %s mean_weighted_f1 0.7360622383497134



Iteration:  72%|███████▏  | 355/494 [01:45<02:00,  1.15it/s][A
Iteration:  72%|███████▏  | 356/494 [01:46<01:35,  1.44it/s][A
Iteration:  72%|███████▏  | 357/494 [01:46<01:17,  1.76it/s][A
Iteration:  72%|███████▏  | 358/494 [01:46<01:05,  2.06it/s][A
Iteration:  73%|███████▎  | 359/494 [01:47<00:57,  2.35it/s][A
Iteration:  73%|███████▎  | 360/494 [01:47<00:50,  2.63it/s][A
Iteration:  73%|███████▎  | 361/494 [01:47<00:46,  2.85it/s][A
Iteration:  73%|███████▎  | 362/494 [01:47<00:43,  3.02it/s][A
Iteration:  73%|███████▎  | 363/494 [01:48<00:41,  3.15it/s][A
Iteration:  74%|███████▎  | 364/494 [01:48<00:39,  3.27it/s][A
Iteration:  74%|███████▍  | 365/494 [01:48<00:38,  3.34it/s][A
Iteration:  74%|███████▍  | 366/494 [01:49<00:37,  3.42it/s][A
Iteration:  74%|███████▍  | 367/494 [01:49<00:36,  3.44it/s][A
Iteration:  74%|███████▍  | 368/494 [01:49<00:36,  3.48it/s][A
Iteration:  75%|███████▍  | 369/494 [01:49<00:35,  3.48it/s][A
Iteration:  75%|███████▍  | 370/494 [01

#### <b>모델 평가</b>

In [None]:
if args.do_pred:
    trainer.load_model()
    trainer.predict()

***** Model Loaded *****
***** Running prediction on test dataset *****
  Num examples = %d 974
  Batch size = %d 32


Evaluating: 100%|██████████| 31/31 [00:05<00:00,  5.53it/s]

Writing Prediction to prediction.csv...



