In [1]:
pip install bert-for-sequence-classification

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bert-for-sequence-classification
  Downloading bert_for_sequence_classification-0.0.4-py3-none-any.whl (14 kB)
Collecting transformers>=4.2.0
  Downloading transformers-4.23.1-py3-none-any.whl (5.3 MB)
[K     |████████████████████████████████| 5.3 MB 5.0 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 62.8 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 68.0 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers, bert-for-sequence-classification
Successfully installed bert-for-sequence-classification-0.0.4 huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.23.1


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
import json

from transformers import AutoModel, AutoTokenizer

from bert_clf import BertCLF, train_evaluate, predict_metrics, prepare_data_notebook, prepare_dataset
from bert_clf.utils import set_global_seed

In [None]:
df = pd.read_csv('arg_struc.csv')

In [None]:
df = df[['text', 'label']]

In [None]:
train, test = train_test_split(df, test_size=0.25, random_state=42)

In [None]:
train['label'].value_counts()

Claim      12103
Premise    10255
Name: label, dtype: int64

In [None]:
test['label'].value_counts()

Claim      4147
Premise    3306
Name: label, dtype: int64

### Transformer LM

In [None]:
config = dict(
    transformer_model = dict(
        model = "roberta-base",
        path_to_state_dict = False,
        device = 'cuda',
        dropout = 0.2,
        learning_rate = 1e-5,
        batch_size = 16,
        shuffle = True,
        maxlen = 128,
    ),
    data = dict(
        train_data_path = None,
        test_data_path = None,
        text_column = "text",
        target_column = "label",
        random_state = 42,
        test_size = 0.3,
        stratify=True
    ),
    training = dict (
    save_state_dict = False, # if False the model will be saved using torch.save(<model_class>)
        # and should be loaded like this: model = torch.load()
        # you will have to install the library to do so
    early_stopping = True,
    delta = 0.001,
    patience = 7,
    num_epochs = 2,
    average_f1 = 'macro',
    other_metrics = ['micro', 'weighted'],
    output_dir = "../results/",
    class_weight = True
    )
)

In [None]:
set_global_seed(seed=config['data']['random_state'])
os.makedirs(config['training']['output_dir'], exist_ok=True)

In [None]:
device = torch.device(config['transformer_model']['device'])
tokenizer = AutoTokenizer.from_pretrained(
        pretrained_model_name_or_path=config['transformer_model']["model"]
    )
model_bert = AutoModel.from_pretrained(
    pretrained_model_name_or_path=config['transformer_model']["model"]
).to(device)

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
id2label, train_texts, valid_texts, train_targets, valid_targets = prepare_data_notebook(
    config=config, train_df = train
)

In [None]:
model = BertCLF(
    pretrained_model=model_bert,
    tokenizer=tokenizer,
    id2label=id2label,
    dropout=config['transformer_model']['dropout'],
    device=device     
    )

In [None]:
model = model.to(device)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=float(config['transformer_model']['learning_rate']))
criterion = nn.NLLLoss()

training_generator, valid_generator = prepare_dataset(
    tokenizer=tokenizer,
    train_texts=train_texts,
    train_targets=train_targets,
    valid_texts=valid_texts,
    valid_targets=valid_targets,
    config=config
)

In [None]:
model = train_evaluate(
    model=model,
    training_generator=training_generator,
    valid_generator=valid_generator,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=config['training']['num_epochs'],
    average=config['training']['average_f1'],
    config=config
)

==== Epoch 1 out of 2 ====


Training loop: 100%|██████████| 979/979 [05:35<00:00,  2.92it/s]
Evaluating loop: 100%|██████████| 420/420 [00:34<00:00, 12.34it/s]


Train F1: 0.6597056982206021
Eval F1: 0.7222126976695029

Train F1 micro: 0.6827757916241062
Eval F1 micro: 0.7380952380952381

Train F1 weighted: 0.67507315882524
Eval F1 weighted: 0.7374990258944538

==== Epoch 2 out of 2 ====


Training loop: 100%|██████████| 979/979 [05:35<00:00,  2.92it/s]
Evaluating loop: 100%|██████████| 420/420 [00:34<00:00, 12.33it/s]


Train F1: 0.7351587651599283
Eval F1: 0.7210331073669005

Train F1 micro: 0.75
Eval F1 micro: 0.7370535714285714

Train F1 weighted: 0.7493860421910783
Eval F1 weighted: 0.7374052594072859




Computing final metrics...: 100%|██████████| 420/420 [00:33<00:00, 12.39it/s]

              precision    recall  f1-score   support

       Claim       0.75      0.77      0.76      3631
     Premise       0.72      0.70      0.71      3077

    accuracy                           0.74      6708
   macro avg       0.74      0.74      0.74      6708
weighted avg       0.74      0.74      0.74      6708






In [None]:
model.to('cpu')

BertCLF(
  (pretrained_model): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm(

In [None]:
preds = []
for i,j in zip(test['text'], test['label']):
    preds.append([model.predict(i), j, i])

In [None]:
pred = []
for i in preds:
    pred.append(i[0])

true = []
for m in preds:
    true.append(m[1])

In [None]:
from sklearn.metrics import classification_report
# roberta adu level task 2, USElecDeb corpus

target_names = ['class 0', 'class 1']
print(classification_report(true, pred, target_names=target_names, digits=3))

              precision    recall  f1-score   support

     class 0      0.751     0.765     0.758      4147
     class 1      0.698     0.681     0.690      3306

    accuracy                          0.728      7453
   macro avg      0.725     0.723     0.724      7453
weighted avg      0.727     0.728     0.728      7453



In [None]:
import numpy as np
dummy = np.ones(7453)

In [None]:
# majority baseline for USElecDEb corpus task 2 
target_names = ['class 0', 'class 1']
print(classification_report(true, dummy, target_names=target_names, digits=3))

              precision    recall  f1-score   support

     class 0      0.000     0.000     0.000      3306
     class 1      0.556     1.000     0.715      4147

    accuracy                          0.556      7453
   macro avg      0.278     0.500     0.357      7453
weighted avg      0.310     0.556     0.398      7453



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Testing on UC-UNSC

In [None]:
ua = pd.read_csv('component_full.csv')

In [None]:
ua

Unnamed: 0,Component,Label
0,the situation regarding Ukraine has seriously ...,claim
1,"Following close to two weeks of relative calm,...",premise
2,The individuals involved called for secession ...,premise
3,"In Donetsk, some of these individuals declared...",premise
4,It has now been reported that there have been ...,premise
...,...,...
4098,We welcome Italy's decision to designate dialo...,claim
4099,China supports practical and effective coopera...,claim
4100,We welcome all the positive efforts being made...,claim
4101,We hope that all the parties concerned will wo...,claim


In [None]:
import string

def preproc(sentence):
    sentence = sentence.lower()
    sentence = ''.join([i for i in sentence if i not in string.punctuation])
    return sentence

In [None]:
ua['Component'] = ua['Component'].apply(preproc)

In [None]:
ua

Unnamed: 0,Component,Label
0,the situation regarding ukraine has seriously ...,claim
1,following close to two weeks of relative calm ...,premise
2,the individuals involved called for secession ...,premise
3,in donetsk some of these individuals declared ...,premise
4,it has now been reported that there have been ...,premise
...,...,...
4098,we welcome italys decision to designate dialog...,claim
4099,china supports practical and effective coopera...,claim
4100,we welcome all the positive efforts being made...,claim
4101,we hope that all the parties concerned will wo...,claim


In [None]:
ua['Label'] = ua['Label'].str.replace('claim','Claim')
ua['Label'] = ua['Label'].str.replace('premise','Premise')

In [None]:
preds = []
for i,j in zip(ua['Component'], ua['Label']):
    preds.append([model.predict(i), j, i])

In [None]:
pred = []
for i in preds:
    pred.append(i[0])

true = []
for m in preds:
    true.append(m[1])

In [None]:
# UC-UNSC roberta task 2 ADU
target_names = ['class 0', 'class 1']
print(classification_report(true, pred, target_names=target_names, digits=3))

              precision    recall  f1-score   support

     class 0      0.805     0.642     0.714      2239
     class 1      0.654     0.813     0.725      1864

    accuracy                          0.720      4103
   macro avg      0.730     0.728     0.720      4103
weighted avg      0.736     0.720     0.719      4103

