In [1]:
! pip install bert-for-sequence-classification

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bert-for-sequence-classification
  Downloading bert_for_sequence_classification-0.0.4-py3-none-any.whl (14 kB)
Collecting transformers>=4.2.0
  Downloading transformers-4.23.1-py3-none-any.whl (5.3 MB)
[K     |████████████████████████████████| 5.3 MB 14.9 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 65.2 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 87.0 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers, bert-for-sequence-classification
Successfully installed bert-for-sequence-classification-0.0.4 huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.23.1


In [2]:
import os
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
import json

from transformers import AutoModel, AutoTokenizer

from bert_clf import BertCLF, train_evaluate, predict_metrics, prepare_data_notebook, prepare_dataset
from bert_clf.utils import set_global_seed



### Prepare UC-UNSC dataset for testing

In [3]:
df = pd.read_csv('sentence_full.csv', sep =',')

In [4]:
df

Unnamed: 0,Sentence,Label,Components
0,"Once again, since the last briefing to the Cou...",claim,{'the situation regarding Ukraine has seriousl...
1,This is now the tenth time that the Council ha...,none,{}
2,The General Assembly also took up the matter o...,none,{}
3,"Following close to two weeks of relative calm,...",premise,{'Following close to two weeks of relative cal...
4,The individuals involved called for secession ...,premise,{'The individuals involved called for secessio...
...,...,...,...
4746,We welcome Italy's decision to designate dialo...,claim,"{""We welcome Italy's decision to designate dia..."
4747,China supports practical and effective coopera...,claim,{'China supports practical and effective coope...
4748,We welcome all the positive efforts being made...,claim,{'We welcome all the positive efforts being ma...
4749,We hope that all the parties concerned will wo...,claim,{'We hope that all the parties concerned will ...


In [5]:
valid = ['claim', 'premise']
utest = df.loc[(df['Label'].isin(valid))]


In [6]:
utest

Unnamed: 0,Sentence,Label,Components
0,"Once again, since the last briefing to the Cou...",claim,{'the situation regarding Ukraine has seriousl...
3,"Following close to two weeks of relative calm,...",premise,{'Following close to two weeks of relative cal...
4,The individuals involved called for secession ...,premise,{'The individuals involved called for secessio...
5,"In Donetsk, some of these individuals declared...",premise,"{""In Donetsk, some of these individuals declar..."
6,It has now been reported that there have been ...,premise,{'It has now been reported that there have bee...
...,...,...,...
4746,We welcome Italy's decision to designate dialo...,claim,"{""We welcome Italy's decision to designate dia..."
4747,China supports practical and effective coopera...,claim,{'China supports practical and effective coope...
4748,We welcome all the positive efforts being made...,claim,{'We welcome all the positive efforts being ma...
4749,We hope that all the parties concerned will wo...,claim,{'We hope that all the parties concerned will ...


In [7]:
utest['Label'] = utest['Label'].str.replace('claim','Claim')
utest['Label'] = utest['Label'].str.replace('premise','Premise')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


### USElecDeb dataset

In [8]:
df = pd.read_csv('sentence_db_candidate.csv', sep =',')

In [9]:
df.shape

(29621, 18)

In [10]:
valid = ['Claim', 'Premise']
df = df.loc[(df['Component'].isin(valid))]

In [11]:
#splitting as the authors did 
df_train = df[df['Set'] == 'TRAIN']
df_val = df[df['Set'] == 'VALIDATION']
df_test = df[df['Set'] == 'TEST']

df_train = df_train[['Speech', 'Component']]
df_val = df_val[['Speech', 'Component']]
df_test = df_test[['Speech', 'Component']]

In [12]:
print(df_train.shape, df_val.shape, df_test.shape)

(10464, 2) (5241, 2) (6575, 2)


### Transformer Language Model

In [13]:
config = dict(
    transformer_model = dict(
        model = "roberta-base",
        path_to_state_dict = False,
        device = 'cuda',
        dropout = 0.2,
        learning_rate = 2e-6,
        batch_size = 16,
        shuffle = True,
        maxlen = 128,
    ),
    data = dict(
        train_data_path = df_train,
        test_data_path = df_val,
        text_column = "Speech",
        target_column = "Component",
        random_state = 20,
        test_size = 0.3,
        stratify=True
    ),
    training = dict (
    save_state_dict = False, # if False the model will be saved using torch.save(<model_class>)
        # and should be loaded like this: model = torch.load()
        # you will have to install the library to do so
    early_stopping = True,
    delta = 0.001,
    patience = 7,
    num_epochs = 2,
    average_f1 = 'macro',
    other_metrics = ['micro', 'weighted'],
    output_dir = "../results/",
    class_weight = True
    )
)

In [14]:
set_global_seed(seed=config['data']['random_state'])
os.makedirs(config['training']['output_dir'], exist_ok=True)

In [15]:
device = torch.device(config['transformer_model']['device'])
tokenizer = AutoTokenizer.from_pretrained(
        pretrained_model_name_or_path=config['transformer_model']["model"]
    )
model_bert = AutoModel.from_pretrained(
    pretrained_model_name_or_path=config['transformer_model']["model"]
).to(device)

#for param in model_bert.parameters():
    #param.requires_grad = False

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [16]:
id2label, train_texts, valid_texts, train_targets, valid_targets = prepare_data_notebook(
    config=config, train_df = df_train, test_df = df_val
)

In [18]:
model = BertCLF(
    pretrained_model=model_bert,
    tokenizer=tokenizer,
    id2label=id2label,
    dropout=config['transformer_model']['dropout'],
    device=device     
    )

In [19]:
model = model.to(device)

In [20]:
optimizer = optim.Adam(model.parameters(), lr=float(config['transformer_model']['learning_rate']))
criterion = nn.NLLLoss()

training_generator, valid_generator = prepare_dataset(
    tokenizer=tokenizer,
    train_texts=train_texts,
    train_targets=train_targets,
    valid_texts=valid_texts,
    valid_targets=valid_targets,
    config=config
)

In [21]:
model = train_evaluate(
    model=model,
    training_generator=training_generator,
    valid_generator=valid_generator,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=config['training']['num_epochs'],
    average=config['training']['average_f1'],
    config=config
)

==== Epoch 1 out of 2 ====


Training loop: 100%|██████████| 654/654 [03:46<00:00,  2.89it/s]
Evaluating loop: 100%|██████████| 328/328 [00:36<00:00,  9.10it/s]


Train F1: 0.6039688702358419
Eval F1: 0.683730697850249

Train F1 micro: 0.6339831804281345
Eval F1 micro: 0.7064913617886179

Train F1 weighted: 0.6231790031073614
Eval F1 weighted: 0.7024163445992616

==== Epoch 2 out of 2 ====


Training loop: 100%|██████████| 654/654 [03:56<00:00,  2.77it/s]
Evaluating loop: 100%|██████████| 328/328 [00:36<00:00,  9.07it/s]


Train F1: 0.7215147547869576
Eval F1: 0.6870416123866058

Train F1 micro: 0.7388188073394495
Eval F1 micro: 0.7110221883468835

Train F1 weighted: 0.7385438616584554
Eval F1 weighted: 0.7051581432739427




Computing final metrics...: 100%|██████████| 328/328 [00:35<00:00,  9.12it/s]


              precision    recall  f1-score   support

       Claim       0.70      0.83      0.76      2837
     Premise       0.74      0.58      0.65      2404

    accuracy                           0.71      5241
   macro avg       0.72      0.70      0.70      5241
weighted avg       0.71      0.71      0.71      5241



In [22]:
model.to('cpu')

BertCLF(
  (pretrained_model): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm(

In [24]:
preds = []
for i,j in zip(df_test['Speech'], df_test['Component']):
    preds.append([model.predict(i), j, i])

In [25]:
pred = []
for i in preds:
    pred.append(i[0])

true = []
for m in preds:
    true.append(m[1])

In [None]:
target_names = ['class 1', 'class 0']
print(classification_report(true, pred, target_names=target_names, digits=3))

### Testing on UC-UNSC

In [27]:
preds = []
for i,j in zip(utest['Sentence'], utest['Label']):
    preds.append([model.predict(i), j, i])

In [28]:
pred = []
for i in preds:
    pred.append(i[0])

true = []
for m in preds:
    true.append(m[1])

In [30]:
from sklearn.metrics import classification_report
target_names = ['class 1', 'class 0']
print(classification_report(true, pred, target_names=target_names, digits=3))

              precision    recall  f1-score   support

     class 1      0.741     0.804     0.771      2077
     class 0      0.739     0.664     0.700      1737

    accuracy                          0.740      3814
   macro avg      0.740     0.734     0.735      3814
weighted avg      0.740     0.740     0.739      3814



In [None]:
import numpy as np
dummy = np.ones(3814)

In [None]:
# baseline for task 2 on Ukraine data 
target_names = ['class 0', 'class 1']
print(classification_report(true, dummy, target_names=target_names, digits=3))

              precision    recall  f1-score   support

     class 0      0.000     0.000     0.000      1737
     class 1      0.545     1.000     0.705      2077

    accuracy                          0.545      3814
   macro avg      0.272     0.500     0.353      3814
weighted avg      0.297     0.545     0.384      3814



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
