In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers==4.15.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.15.0
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m880.6/880.6 KB[0m [31m59.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.4/182.4 KB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import pandas as pd
from transformers import RobertaTokenizer
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import RobertaTokenizer, RobertaModel, AdamW
import numpy as np
import shutil
from tqdm import tqdm
from configparser import ConfigParser
import os
import json
import argparse
import shutil
import sys
import transformers
from nltk.tokenize import sent_tokenize
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

import warnings
warnings.simplefilter("ignore")

In [None]:
def load_dataframes():
    df = {}
    df["train"] = pd.read_pickle("data/train.pkl")
    df["valid"] = pd.read_pickle("data/valid.pkl")
    df["test"] = pd.read_pickle("data/test.pkl")
    return df


class ClaimData(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe['text']
        self.evidence = dataframe['evidence_sents']
        self.targets = dataframe['rating']
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        evidence = str(self.evidence[index])
        evidence = " ".join(evidence.split())

        inputs = self.tokenizer(
            text,
            evidence,
            truncation="longest_first",
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )

        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]

        return {
            'input_ids': torch.tensor(ids, dtype=torch.long),
            'attention_mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'labels': torch.tensor(self.targets[index], dtype=torch.long)
        }

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, predictions)
    target_names = [0, 1, 2]
    output_dict = classification_report(labels, predictions, labels=range(3), target_names=target_names, output_dict=True)
    return {'accuracy': accuracy, 'macro f1-score': output_dict['macro avg']['f1-score']}




In [None]:
# seed
rand_seed = 42
num_epochs = 3
torch.manual_seed(rand_seed)
output_dir = "output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

df = load_dataframes()

model_checkpoint = 'roberta-base'

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

MAX_SEQ_LEN = 512
print("creating data....")
training_set = ClaimData(df['train'], tokenizer, MAX_SEQ_LEN)
validation_set = ClaimData(df['valid'], tokenizer, MAX_SEQ_LEN)
testing_set = ClaimData(df['test'], tokenizer, MAX_SEQ_LEN)
print("setting model....")
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=3)

args = TrainingArguments(
    output_dir = output_dir,
    evaluation_strategy = "epoch",
    save_strategy = 'epoch',
    learning_rate=1e-6,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=num_epochs,
    weight_decay=0.001,
    load_best_model_at_end=True,
    metric_for_best_model='macro f1-score',
    seed=rand_seed
)

trainer = Trainer(
    model,
    args,
    train_dataset=training_set,
    eval_dataset=validation_set,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

Downloading:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

creating data....
setting model....


Downloading:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classi

In [None]:
print("training....")
trainer.train()
print("evaluating....")
eval_results = trainer.evaluate(validation_set)

***** Running training *****
  Num examples = 16859
  Num Epochs = 3
  Instantaneous batch size per device = 12
  Total train batch size (w. parallel, distributed & accumulation) = 12
  Gradient Accumulation steps = 1
  Total optimization steps = 4215


training....


Epoch,Training Loss,Validation Loss,Accuracy,Macro f1-score
1,0.9859,0.924528,0.573611,0.414463
2,0.913,0.898621,0.586339,0.443739
3,0.884,0.891291,0.592703,0.470188


***** Running Evaluation *****
  Num examples = 2357
  Batch size = 12
Saving model checkpoint to output/checkpoint-1405
Configuration saved in output/checkpoint-1405/config.json
Model weights saved in output/checkpoint-1405/pytorch_model.bin
tokenizer config file saved in output/checkpoint-1405/tokenizer_config.json
Special tokens file saved in output/checkpoint-1405/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 2357
  Batch size = 12
Saving model checkpoint to output/checkpoint-2810
Configuration saved in output/checkpoint-2810/config.json
Model weights saved in output/checkpoint-2810/pytorch_model.bin
tokenizer config file saved in output/checkpoint-2810/tokenizer_config.json
Special tokens file saved in output/checkpoint-2810/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 2357
  Batch size = 12
Saving model checkpoint to output/checkpoint-4215
Configuration saved in output/checkpoint-4215/config.json
Model weights saved in output/c

evaluating....


In [None]:
predictions, labels, metrics = trainer.predict(testing_set)
metrics["predict_samples"] = len(testing_set)

trainer.log_metrics("predict", metrics)
trainer.save_metrics("predict", metrics)

predictions = np.argmax(predictions, axis=1)
output_predict_file = os.path.join(output_dir, "predictions.csv")

***** Running Prediction *****
  Num examples = 2357
  Batch size = 12


***** predict metrics *****
  predict_samples         =       2357
  test_accuracy           =     0.4103
  test_loss               =     1.1116
  test_macro f1-score     =     0.1939
  test_runtime            = 0:02:54.27
  test_samples_per_second =     13.524
  test_steps_per_second   =       1.13


In [None]:
print(len(predictions))
test_ids = df["test"]["claim_id"].values
print(len(test_ids))

2357
2357


In [None]:
import csv
if trainer.is_world_process_zero():
    with open(output_predict_file, "w") as f:
        writer = csv.writer(f)
        writer.writerow(["id", "rating"])
        for index, item in zip(test_ids, predictions):
            writer.writerow([index,item])
        writer.writerow([30380, 0])
        writer.writerow([31035, 0])
        writer.writerow([31420, 0])