In [1]:
from transformers import AutoModelForSequenceClassification, AutoConfig, AutoTokenizer, Trainer, TrainingArguments, ElectraTokenizer, ElectraForSequenceClassification, BertTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from datasets import Dataset, DatasetDict
import torch
from accelerate import Accelerator
import os
from performance import PerformanceSaver

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
LOAD_SAVED_MODEL = False
saved_model_path = "models/electra_classifier"
model_name = "howey/electra-base-mnli"
data_dir = "data/"

In [3]:
if LOAD_SAVED_MODEL:
    tokenizer = ElectraTokenizer.from_pretrained(saved_model_path)
    config = AutoConfig.from_pretrained(saved_model_path, num_labels=13)
    model = ElectraForSequenceClassification.from_pretrained(saved_model_path, num_labels=13, ignore_mismatched_sizes=True, classifier_dropout=0.1)
else:
    tokenizer = ElectraTokenizer.from_pretrained(model_name)
    config = AutoConfig.from_pretrained(model_name, num_labels=13)
    model = ElectraForSequenceClassification.from_pretrained(model_name, num_labels=13, ignore_mismatched_sizes=True, classifier_dropout=0.1)

if torch.cuda.is_available():
    model = model.cuda()


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at howey/electra-base-mnli and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([13, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([13]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
training_args = TrainingArguments(
            output_dir='./results',
            do_eval=True,
            do_train=True,
            num_train_epochs=6,
            save_total_limit=2,
            load_best_model_at_end=True,
            learning_rate=8.5e-05,
            per_device_train_batch_size=12,
            per_device_eval_batch_size=12,
            save_strategy="steps",
            logging_strategy="steps",
            evaluation_strategy="steps",
            logging_steps=50,
            eval_steps=50,
            save_steps=50,
        )

def compute_metrics(pred):
            labels = pred.label_ids
            preds = pred.predictions.argmax(-1)
            precision, recall, f1, _ = precision_recall_fscore_support(
                labels, preds, average="weighted"
            )
            acc = accuracy_score(labels, preds)
            return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

In [5]:
train_df = pd.read_csv(os.path.join(data_dir, 'train.csv'))
dev_df = pd.read_csv(os.path.join(data_dir, 'dev.csv'))
test_df = pd.read_csv(os.path.join(data_dir, 'test.csv'))

label_encoder = LabelEncoder()
label_encoder.fit(train_df['label'])
print(label_encoder.classes_)
train_df['label'] = label_encoder.transform(train_df['label'])
dev_df['label'] = label_encoder.transform(dev_df['label'])
test_df['label'] = label_encoder.transform(test_df['label'])


dataset = DatasetDict({
    'train': Dataset.from_pandas(train_df),
    'validation': Dataset.from_pandas(dev_df),
    'test': Dataset.from_pandas(test_df)
})

['ad hominem' 'ad populum' 'appeal to emotion' 'circular reasoning'
 'equivocation' 'fallacy of credibility' 'fallacy of extension'
 'fallacy of logic' 'fallacy of relevance' 'false causality'
 'false dilemma' 'faulty generalization' 'intentional']


In [6]:
def process(batch):
    inputs = tokenizer(batch["text"], truncation=True, padding="max_length")
    return {
            "input_ids": inputs["input_ids"],
            "attention_mask": inputs["attention_mask"],
            "labels": batch["label"],
        }
    
tokenized_dataset = dataset.map(process, batched=True, remove_columns=dataset["train"].column_names)

Map: 100%|██████████| 3653/3653 [00:02<00:00, 1644.82 examples/s]
Map: 100%|██████████| 332/332 [00:00<00:00, 1650.60 examples/s]
Map: 100%|██████████| 331/331 [00:00<00:00, 1667.25 examples/s]


In [7]:
trainer = Trainer(
            model=model,
            args=training_args,
            compute_metrics=compute_metrics,
            train_dataset=tokenized_dataset["train"],
            eval_dataset=tokenized_dataset["validation"],
            tokenizer=tokenizer,
        )

In [8]:
if not LOAD_SAVED_MODEL:
    trainer.train()
    trainer.save_model("models/electra_classifier")
    predictions = trainer.predict(tokenized_dataset["test"])
    print(predictions.metrics)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mluca-mouchel[0m ([33mlia_epfl[0m). Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
50,1.7744,1.975606,0.433735,0.406432,0.509512,0.433735
100,0.8044,1.687549,0.539157,0.521926,0.587172,0.539157
150,0.5246,1.543121,0.533133,0.523196,0.570887,0.533133
200,0.2721,1.56171,0.569277,0.563421,0.569155,0.569277
250,0.157,1.626249,0.569277,0.567011,0.577199,0.569277
300,0.0868,1.732947,0.587349,0.580146,0.606282,0.587349
350,0.0508,1.831588,0.575301,0.57014,0.596121,0.575301
400,0.0306,1.827956,0.587349,0.584975,0.597094,0.587349
450,0.0154,1.8782,0.590361,0.583135,0.58725,0.590361


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'test_loss': 1.6786552667617798, 'test_accuracy': 0.5347432024169184, 'test_f1': 0.521081923073277, 'test_precision': 0.5742167657719635, 'test_recall': 0.5347432024169184, 'test_runtime': 3.0297, 'test_samples_per_second': 109.251, 'test_steps_per_second': 2.31}


In [None]:
your_sentence = "Drinking vegetable juice, bitter gourd, can cure COVID-19"

def inference(text, classes):
    input_ids = tokenizer.encode(text, add_special_tokens=True, return_tensors="pt").to(model.device)
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits
        print(logits)
    predicted_class = torch.argmax(logits, dim=1).item()
    return classes[predicted_class]

inference(your_sentence, label_encoder.classes_)

tensor([[-1.1200, -0.9554, -0.2303, -1.2249,  1.7002, -0.5224, -1.5957,  1.1351,
          1.1680,  2.3626, -1.0502,  0.6686, -0.8711]], device='cuda:0')


'false causality'

In [None]:
del model
del trainer
torch.cuda.empty_cache()

: 