In [4]:
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
from transformers import DataCollatorWithPadding
import evaluate
import numpy as np
from evaluate import evaluator
import torch.cuda
import pandas as pd
from sklearn.metrics import classification_report

In [5]:
echr = load_dataset("ecthr_cases",  "violation-prediction")

In [6]:
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")

In [7]:
def encode(examples):
    return tokenizer( examples["text"],
                     truncation=True, 
                     padding=True)

In [8]:
test_dataset = echr['test']
test_dataset = test_dataset.map( lambda examples: {"text": "\n".join(examples["facts"])})
test_dataset = test_dataset.map(encode, batched=True)
test_dataset = test_dataset.map( lambda examples: {'labels' :list(1 if examples['labels'][i] else 0 for i in range(len(examples['labels'])))}, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [9]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [10]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [12]:
id2label = {0: "NON_VIOLATED", 1: "VIOLATED"}
label2id = {"NON_VIOLATED": 0, "VIOLATED": 1}
model = AutoModelForSequenceClassification.from_pretrained("../models/albert_ecthr_model/checkpoint-192", num_labels=2, id2label=id2label, label2id=label2id)
model.to(device)

AlbertForSequenceClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768,

In [13]:
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)

In [14]:
results = pipe(tokenizer.decode(case, clean_up_tokenization_spaces=True, skip_special_tokens=True) for case in test_dataset['input_ids'])

In [15]:
predictions = list(result['label'] for result in results)

In [16]:
report = classification_report(list('NON_VIOLATED' if outcome==0 else 'VIOLATED' for outcome in test_dataset['labels']), predictions)

In [17]:
print(report)

              precision    recall  f1-score   support

NON_VIOLATED       0.19      0.78      0.30       135
    VIOLATED       0.93      0.47      0.63       865

    accuracy                           0.51      1000
   macro avg       0.56      0.62      0.46      1000
weighted avg       0.83      0.51      0.58      1000

