Attempt 008 Summarization, and 

In [None]:
%env CUDA_VISIBLE_DEVICES=0
!pip install transformers datasets
!pip install cloud-tpu-client==0.10 torch==1.12.0 https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-1.12-cp37-cp37m-linux_x86_64.whl tensorboard-plugin-profile

env: CUDA_VISIBLE_DEVICES=0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch-xla==1.12
  Using cached https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-1.12-cp37-cp37m-linux_x86_64.whl (187.4 MB)


In [None]:
#adapted from https://github.com/FakeNewsChallenge/fnc-1-baseline/tree/master/utils
from csv import DictReader
import random
import os
from collections import defaultdict
import sys
import os
import re
import argparse

In [None]:
#Loading dataset from summaries
class DataReaderFromPath():
    def __init__(self, name="train", path="fnc-1", is_unlabeled=False):
        self.path = path
        print("Reading data from path : " + str(path) + " " )
        summaries = name + "_summaries.csv"
        stances = name + "_stances"
        if is_unlabeled is True:
            stances = stances + "_unlabeled"
        stances = stances + ".csv"

        self.stances = self.read(stances)
        articlesSummaries = self.read(summaries)
        self.summaries = dict()

        # make the body ID an integer value
        for s in self.stances:
            s['Body ID'] = int(s['Body ID'])

        # copy all bodies into a dictionary
        for summary in articlesSummaries:
            self.summaries[int(summary['Body ID'])] = summary['articleSummary']

        print("Total stances: " + str(len(self.stances)))
        print("Total summaries: " + str(len(self.summaries)))

    def read(self, filename):
        rows = []
        with open(self.path + "/" + filename, "r", encoding='utf-8') as table:
            r = DictReader(table)

            for line in r:
                rows.append(line)
        return rows

In [None]:
d = DataReaderFromPath()

Reading data from path : fnc-1 
Total stances: 49972
Total summaries: 1683


In [None]:
# Adapted from https://github.com/FakeNewsChallenge/fnc-1/blob/master/scorer.py
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import json
LABELS = ['agree', 'disagree', 'discuss', 'unrelated']
LABELS_RELATED = ['unrelated', 'related']
RELATED = LABELS[0:3]


def score_submission(gold_labels, test_labels):
    score = 0.0
    cm = [[0, 0, 0, 0],
          [0, 0, 0, 0],
          [0, 0, 0, 0],
          [0, 0, 0, 0]]

    for i, (g, t) in enumerate(zip(gold_labels, test_labels)):
        g_stance, t_stance = g, t
        if g_stance == t_stance:
            score += 0.25
            if g_stance != 'unrelated':
                score += 0.50
        if g_stance in RELATED and t_stance in RELATED:
            score += 0.25

        cm[LABELS.index(g_stance)][LABELS.index(t_stance)] += 1

    return score, cm


def print_confusion_matrix(cm):
    lines = []
    header = "|{:^11}|{:^11}|{:^11}|{:^11}|{:^11}|".format('', *LABELS)
    line_len = len(header)
    lines.append("-" * line_len)
    lines.append(header)
    lines.append("-" * line_len)

    hit = 0
    total = 0
    for i, row in enumerate(cm):
        hit += row[i]
        total += sum(row)
        lines.append("|{:^11}|{:^11}|{:^11}|{:^11}|{:^11}|".format(LABELS[i],
                                                                   *row))
        lines.append("-" * line_len)
    print('\n'.join(lines))


def report_score(actual, predicted):
    score, cm = score_submission(actual, predicted)
    best_score, _ = score_submission(actual, actual)

    print_confusion_matrix(cm)
    print("Score: " + str(score) + " out of " + str(best_score) + "\t(" + str(score * 100 / best_score) + "%)")
    get_precision_recall_f1_scores(actual, predicted)

    return score * 100 / best_score

#Calculating accuracy precision recall f1 scores
def get_precision_recall_f1_scores(actual, predicted):
    # calculate precision scores for labels - Average None
    p, r, f1, _ = precision_recall_fscore_support(y_true=actual, y_pred=predicted, labels=LABELS, zero_division=0.0,
                                                  average=None)

    precisions = [{LABELS[index] : value} for (index, value) in enumerate(p)]
    recalls = [{LABELS[index] : value} for (index, value) in enumerate(r)]
    f1_scores = [{LABELS[index] : value} for (index, value) in enumerate(f1)]
    acc = accuracy_score(actual, predicted)
    avg_none = {"accuracy": acc, "precision": precisions, "recall": recalls, "f1": f1_scores}

    # calculate precision scores for labels - Average micro
    p, r, f1, _ = precision_recall_fscore_support(y_true=actual, y_pred=predicted, labels=LABELS, zero_division=0.0,
                                                  average='micro')

    avg_micro = {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

    # calculate precision scores for labels - Average macro
    p, r, f1, _ = precision_recall_fscore_support(y_true=actual, y_pred=predicted, labels=LABELS, zero_division=0.0,
                                                  average='macro')

    avg_macro = {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

    # calculate precision scores for labels - Average weighted
    p, r, f1, _ = precision_recall_fscore_support(y_true=actual, y_pred=predicted, labels=LABELS, zero_division=0.0,
                                                  average='weighted')

    avg_weighted = {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

    all_values = {"Each_Class": avg_none, "micro": avg_micro, "macro": avg_macro, "weighted": avg_weighted}
    print("--All Metrics --")
    print(json.dumps(all_values, indent=4, sort_keys=True, separators=(',', ': ')))

    return avg_macro

def compute_metrics(pred):
    labels = [LABELS[int(a)] for a in pred.label_ids]#pred.label_ids
    preds = [LABELS[int(a)] for a in pred.predictions.argmax(-1)]#pred.predictions.argmax(-1)
    
    return get_precision_recall_f1_scores(labels, preds)

In [None]:
#adapted from https://github.com/FakeNewsChallenge/fnc-1-baseline/blob/master/feature_engineering.py
import os
import re
import nltk
import numpy as np
from tqdm import tqdm

def combine_headline_and_bodies(headlines, bodies):
    X = []
    for i, (headline, body) in tqdm(enumerate(zip(headlines, bodies))):
        joinedStr = headline + " <TitleSummarySep> " + body
        X.append(joinedStr)
    return X

In [None]:
headlines = ["The world is ending","We all are about to die"]
bodies = ["The world is ending","We all are about to die. Everyone will be dead. We will. Kill us."]
fp = combine_headline_and_bodies(headlines, bodies)
print(fp)

2it [00:00, 7345.54it/s]

['The world is ending <TitleSummarySep> The world is ending', 'We all are about to die <TitleSummarySep> We all are about to die. Everyone will be dead. We will. Kill us.']





In [None]:
#adapted from https://github.com/FakeNewsChallenge/fnc-1-baseline/blob/master/fnc_kfold.py
import sys
import numpy as np


def generate_labeled_data(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append((LABELS.index(stance['Stance'])))
        h.append(stance['Headline'])
        b.append(dataset.summaries[stance['Body ID']])

    X = combine_headline_and_bodies(h, b)
    print(y)

    return X, y


def generate_data_without_labels(stances, dataset, name):
    h, b = [], []

    for stance in stances:
        h.append(stance['Headline'])
        b.append(dataset.summaries[stance['Body ID']])

    for stance in stances:
        h.append(stance['Headline'])
        b.append(dataset.summaries[stance['Body ID']])

    X = combine_headline_and_bodies(h, b)
    return X

In [None]:
import sys
import numpy as np

from sklearn.ensemble import AdaBoostClassifier
import pandas as pd
from datasets import Dataset, DatasetDict

#check_version()
#parse_params()

# Load the training dataset and generate folds
d = DataReaderFromPath()
Xall, yall = generate_labeled_data(d.stances, d, "allData")

pairedDataAndLabel = list(zip(Xall, yall))
np.random.shuffle(pairedDataAndLabel)
Xall, yall = zip(*pairedDataAndLabel)
n_train = int(0.8 * len(Xall))
n_val = int(0.1 * len(Xall))
data_dict = DatasetDict(train=Dataset.from_dict({"text": Xall[:n_train], "label": yall[:n_train]}),
val=Dataset.from_dict(
    {
        "text": Xall[n_train : n_train + n_val],
        "label": yall[n_train : n_train + n_val],
    }
),
test=Dataset.from_dict({"text": Xall[-n_val:], "label": yall[-n_val:]}),
)

# Load the competition dataset
unlabeled_competition_dataset = DataReaderFromPath(name="competition_test", is_unlabeled=True)
X_unlabeled = generate_data_without_labels(unlabeled_competition_dataset.stances, unlabeled_competition_dataset,"competition_unlabeled")



Reading data from path : fnc-1 
Total stances: 49972
Total summaries: 1683


49972it [00:00, 286059.84it/s]


[3, 0, 3, 3, 1, 0, 3, 3, 0, 3, 2, 0, 3, 3, 2, 3, 2, 0, 3, 2, 3, 2, 3, 3, 0, 0, 3, 3, 3, 2, 3, 2, 3, 3, 3, 2, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 3, 0, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0, 3, 3, 3, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 2, 0, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 0, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 0, 3, 3, 1, 3, 0, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 0, 3, 2, 3, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 0, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 0, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 2, 3, 3, 3, 1, 3, 3, 2, 1, 2, 2, 2, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 1, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 2, 

50826it [00:00, 242453.26it/s]


In [None]:
import os, numpy as np

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)

#adapted from https://github.com/Demfier/pytorch-tutorials/blob/main/bert_classifier/train.py
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", use_fast=True)
new_token = "<TitleSummarySep>"
tokenizer.add_tokens(new_token)



1

In [None]:
def preprocess(example):
  result =  tokenizer(example["text"], max_length=200, truncation=True)  
  return result

encoded_dataset = data_dict.map(preprocess, batched=True)

  0%|          | 0/40 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

In [None]:
encoded_dataset['train']['input_ids'][0]

[101,
 2502,
 2924,
 9180,
 1010,
 2522,
 1011,
 3910,
 1997,
 1996,
 5699,
 7100,
 6080,
 1010,
 2038,
 2351,
 30522,
 1037,
 4861,
 7108,
 2000,
 1996,
 5499,
 2110,
 2177,
 3555,
 2019,
 2137,
 2931,
 13446,
 2001,
 2730,
 1999,
 1037,
 26276,
 14369,
 18886,
 3489,
 1012,
 1996,
 4861,
 4453,
 1996,
 2450,
 2004,
 26491,
 3744,
 26774,
 1998,
 2056,
 2016,
 2001,
 2730,
 2076,
 5152,
 12583,
 1012,
 2053,
 5499,
 2110,
 17671,
 2020,
 2730,
 1999,
 1996,
 14369,
 18886,
 9681,
 1010,
 1996,
 4861,
 2582,
 3555,
 1012,
 26774,
 1010,
 1997,
 20719,
 1010,
 5334,
 1010,
 2018,
 2042,
 2551,
 1999,
 4977,
 13951,
 9042,
 8711,
 1012,
 102]

In [None]:
tokenizer.get_added_vocab()

{'<TitleSummarySep>': 30522}

In [None]:
encoded_dataset['train']['text'][0]

'Big Bank Hank, co-founder of the Sugarhill Gang, has died <TitleSummarySep> A statement attributed to the Islamic State group claimed an American female hostage was killed in a Jordanian airstrike. The statement identified the woman as Kayla Jean Mueller and said she was killed during Muslim prayers. No Islamic State militants were killed in the airstrikes, the statement further claimed. Mueller, of Prescott, Arizona, had been working in Turkey assisting Syrian refugees.'

In [None]:
import torch_xla.distributed.xla_multiprocessing as xmp

backbone = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased", num_labels=4
    )
backbone.resize_token_embeddings(len(tokenizer))
WRAPPED_MODEL = xmp.MpModelWrapper(backbone)

def train_fnc(model):
  # https://huggingface.co/docs/transformers/v4.20.1/en/main_classes/trainer#transformers.TrainingArguments
  training_args = TrainingArguments(
      "checkpoints",
      evaluation_strategy="epoch",
      save_strategy="epoch",
      learning_rate=1e-5,
      warmup_ratio=0.1,
      weight_decay=0.01,
      load_best_model_at_end=True,
      num_train_epochs=5,
      metric_for_best_model="accuracy",
      per_device_eval_batch_size=128,
      per_device_train_batch_size=128,
      tpu_num_cores = 8,
  )

  trainer = Trainer(
      model,
      training_args,
      train_dataset=encoded_dataset["train"],
      eval_dataset=encoded_dataset["val"],
      tokenizer=tokenizer,
      compute_metrics=compute_metrics,
  )

  trainer.place_model_on_device = False
  trainer.train()

  print(
      trainer.evaluate(eval_dataset=encoded_dataset["test"], metric_key_prefix="test")
  )

  trainer.save_model("nli_model/")
  tokenizer.save_pretrained("nli_model/")
  
  return trainer

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifi

In [None]:
import torch_xla.core.xla_model as xm
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp

def _mp_fn(index):
    device = xm.xla_device()
    # We wrap this 
    model = WRAPPED_MODEL.to(device)

    trainer = train_fnc(model)
    return trainer

trainer = xmp.spawn(_mp_fn, start_method="fork")

The following columns in the training set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 39977
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 1024
  Gradient Accumulation steps = 1
  Total optimization steps = 200


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.780018,0.738643,0.184661,0.25,0.212419
2,No log,0.458432,0.861317,0.392145,0.436904,0.4132
3,No log,0.342054,0.884531,0.400901,0.474635,0.431509
4,No log,0.314124,0.888333,0.402518,0.479905,0.43397
5,No log,0.307902,0.889133,0.402894,0.479954,0.434295


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4997
  Batch size = 128


--All Metrics --
--All Metrics --
--All Metrics --
{
    "Each_Class": {
        "accuracy": 0.7386431859115469,
        "f1": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.0
            },
            {
                "unrelated": 0.8496777163904236
            }
        ],
        "precision": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.0
            },
            {
                "unrelated": 0.7386431859115469
            }
        ],
        "recall": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.0
            },
            {
                "unrelated": 1.0
            }
        ]
    },
    "macro": {
        "accuracy": 0.73

Saving model checkpoint to checkpoints/checkpoint-40


{
    "Each_Class": {
        "accuracy": 0.7386431859115469,
        "f1": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.0
            },
            {
                "unrelated": 0.8496777163904236
            }
        ],
        "precision": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.0
            },
            {
                "unrelated": 0.7386431859115469
            }
        ],
        "recall": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.0
            },
            {
                "unrelated": 1.0
            }
        ]
    },
    "macro": {
        "accuracy": 0.7386431859115469,
        "f1": 0.2124194290976059,
 

Configuration saved in checkpoints/checkpoint-40/config.json
Model weights saved in checkpoints/checkpoint-40/pytorch_model.bin
tokenizer config file saved in checkpoints/checkpoint-40/tokenizer_config.json
Special tokens file saved in checkpoints/checkpoint-40/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4997
  Batch size = 128


--All Metrics --
--All Metrics --
{
    "Each_Class": {
        "accuracy": 0.8613167900740444,
        "f1": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.7038028923406535
            },
            {
                "unrelated": 0.9489981785063752
            }
        ],
        "precision": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.655688622754491
            },
            {
                "unrelated": 0.9128911138923654
            }
        ],
        "recall": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.7595375722543353
            },
            {
                "unrelated": 0.9880791113519372
            }
        ]
    

Saving model checkpoint to checkpoints/checkpoint-80






Configuration saved in checkpoints/checkpoint-80/config.json
Model weights saved in checkpoints/checkpoint-80/pytorch_model.bin
tokenizer config file saved in checkpoints/checkpoint-80/tokenizer_config.json
Special tokens file saved in checkpoints/checkpoint-80/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4997
  Batch size = 128


--All Metrics --
--All Metrics --
--All Metrics --
--All Metrics --
--All Metrics --
--All Metrics --
--All Metrics --
{
    "Each_Class": {
        "accuracy": 0.8845307184310587,
        "f1": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.7514231499051233
            },
            {
                "unrelated": 0.9746138347884487
            }
        ],
        "precision": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.6371681415929203
            },
            {
                "unrelated": 0.9664358018114012
            }
        ],
        "recall": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.915606936416185
            },
     

Saving model checkpoint to checkpoints/checkpoint-120
Configuration saved in checkpoints/checkpoint-120/config.json
Model weights saved in checkpoints/checkpoint-120/pytorch_model.bin
tokenizer config file saved in checkpoints/checkpoint-120/tokenizer_config.json
Special tokens file saved in checkpoints/checkpoint-120/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4997
  Batch size = 128


--All Metrics --
{
    "Each_Class": {
        "accuracy": 0.8883329997998799,
        "f1": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.7566557683325549
            },
            {
                "unrelated": 0.9792228818132759
            }
        ],
        "precision": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.6347962382445141
            },
            {
                "unrelated": 0.9752754635850578
            }
        ],
        "recall": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.9364161849710982
            },
            {
                "unrelated": 0.9832023841777296
            }
        ]
    },
    "macro": 

Saving model checkpoint to checkpoints/checkpoint-160





Configuration saved in checkpoints/checkpoint-160/config.json
Model weights saved in checkpoints/checkpoint-160/pytorch_model.bin
tokenizer config file saved in checkpoints/checkpoint-160/tokenizer_config.json
Special tokens file saved in checkpoints/checkpoint-160/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4997
  Batch size = 128


--All Metrics --
--All Metrics --
{
    "Each_Class": {
        "accuracy": 0.8891334800880528,
        "f1": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.7571361722040244
            },
            {
                "unrelated": 0.9800431499460626
            }
        ],
        "precision": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.6360062893081762
            },
            {
                "unrelated": 0.9755704697986577
            }
        ],
        "recall": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.9352601156069364
            },
            {
                "unrelated": 0.9845570306150094
            }
        ]
   

Saving model checkpoint to checkpoints/checkpoint-200
Configuration saved in checkpoints/checkpoint-200/config.json
Model weights saved in checkpoints/checkpoint-200/pytorch_model.bin
tokenizer config file saved in checkpoints/checkpoint-200/tokenizer_config.json
Special tokens file saved in checkpoints/checkpoint-200/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from checkpoints/checkpoint-200 (score: 0.8891334800880528).
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4997
  Batch size = 128


--All Metrics --
--All Metrics --
{
    "Each_Class": {
        "accuracy": 0.8865319191514909,
        "f1": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.7606645711719803
            },
            {
                "unrelated": 0.980166871836958
            }
        ],
        "precision": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.6416666666666667
            },
            {
                "unrelated": 0.9744356812618983
            }
        ],
        "recall": [
            {
                "agree": 0.0
            },
            {
                "disagree": 0.0
            },
            {
                "discuss": 0.9338478500551268
            },
            {
                "unrelated": 0.9859658778205834
            }
        ]
    

Saving model checkpoint to nli_model/
Configuration saved in nli_model/config.json
Model weights saved in nli_model/pytorch_model.bin
tokenizer config file saved in nli_model/tokenizer_config.json
Special tokens file saved in nli_model/special_tokens_map.json
tokenizer config file saved in nli_model/tokenizer_config.json
Special tokens file saved in nli_model/special_tokens_map.json
