[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tBNaK1av-ZOYcKIv1kJMjq90wUis8VtV?usp=sharing)

# base(base)(topic)(in-texts)

In [None]:
!pip install pytorch_pretrained_bert pytorch-nlp

Collecting pytorch_pretrained_bert
  Downloading pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123 kB)
[K     |████████████████████████████████| 123 kB 28.4 MB/s 
[?25hCollecting pytorch-nlp
  Downloading pytorch_nlp-0.5.0-py3-none-any.whl (90 kB)
[K     |████████████████████████████████| 90 kB 11.9 MB/s 
Collecting boto3
  Downloading boto3-1.23.5-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 68.2 MB/s 
Collecting jmespath<2.0.0,>=0.7.1
  Downloading jmespath-1.0.0-py3-none-any.whl (23 kB)
Collecting botocore<1.27.0,>=1.26.5
  Downloading botocore-1.26.5-py3-none-any.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 52.7 MB/s 
[?25hCollecting s3transfer<0.6.0,>=0.5.0
  Downloading s3transfer-0.5.2-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 9.6 MB/s 
[?25hCollecting urllib3<1.27,>=1.25.4
  Downloading urllib3-1.26.9-py2.py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 75.3 MB/s 


In [None]:
import sys
import numpy as np
import random as rn
import torch
from pytorch_pretrained_bert import BertModel
from torch import nn
from pytorch_pretrained_bert import BertTokenizer

from keras.preprocessing.sequence import pad_sequences
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.optim import Adam
from torch.nn.utils import clip_grad_norm_
from IPython.display import clear_output

In [None]:
seed_val = 42
rn.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed(seed_val)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

100%|██████████| 231508/231508 [00:00<00:00, 273414.20B/s]


In [None]:
import pandas as pd
train = pd.read_csv("/content/drive/MyDrive/Keypoints/inter-text_from_T5-small_temp_1_full_train.csv")
dev = pd.read_csv("/content/drive/MyDrive/Keypoints/inter-text_from_T5-small_temp_1_full_dev.csv")
test = pd.read_csv("/content/drive/MyDrive/Keypoints/inter-text_from_T5-small_temp_1_full_test.csv")
for split in [train,dev,test]:
  for i in split.index:
    arg = split['argument'][i]
    key = split['key_point'][i]
    topic = split['topic'][i]
    in_text = split['intermediary_text']
    if arg[-1] != '.':
      pair = topic + '. ' + arg + '. ' + key + '.' + in_text
      split.at[i, 'pair'] = pair
    else:
      pair = topic + '. ' + arg + ' ' + key + '.' + in_text
      split.at[i, 'pair'] = pair

ValueError: ignored

In [None]:
train

In [None]:
train['pair'][0]

In [None]:
pairs_train = train.pair.values
labels_train = train.label.values

pairs_dev = dev.pair.values
labels_dev = dev.label.values

pairs_test = test.pair.values
labels_test = test.label.values

In [None]:
train_tokens = list(map(lambda t: ['[CLS]'] + tokenizer.tokenize(t)[:79] + ['[SEP]'], pairs_train))
dev_tokens = list(map(lambda t: ['[CLS]'] + tokenizer.tokenize(t)[:79] + ['[SEP]'], pairs_dev))
test_tokens = list(map(lambda t: ['[CLS]'] + tokenizer.tokenize(t)[:79] + ['[SEP]'], pairs_test))

train_tokens_ids = pad_sequences(list(map(tokenizer.convert_tokens_to_ids, train_tokens)), maxlen=81, truncating="post", padding="post", dtype="int")
dev_tokens_ids = pad_sequences(list(map(tokenizer.convert_tokens_to_ids, dev_tokens)), maxlen=81, truncating="post", padding="post", dtype="int")
test_tokens_ids = pad_sequences(list(map(tokenizer.convert_tokens_to_ids, test_tokens)), maxlen=81, truncating="post", padding="post", dtype="int")



In [None]:
train_masks = [[float(i > 0) for i in ii] for ii in train_tokens_ids]
dev_masks = [[float(i > 0) for i in ii] for ii in dev_tokens_ids]
test_masks = [[float(i > 0) for i in ii] for ii in test_tokens_ids]

### BERT model

In [None]:
class BertBinaryClassifier(nn.Module):
    def __init__(self, dropout=0.1):
        super(BertBinaryClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-uncased')

        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, tokens, masks=None):
        _, pooled_output = self.bert(tokens, attention_mask=masks, output_all_encoded_layers=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        proba = self.sigmoid(linear_output)
        return proba

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
bert_clf = BertBinaryClassifier()
bert_clf = bert_clf.cuda()

100%|██████████| 407873900/407873900 [00:10<00:00, 37503455.61B/s]


## Finetune

In [None]:
BATCH_SIZE = 32
EPOCHS = 3

In [None]:
train_tokens_tensor = torch.tensor(train_tokens_ids)
train_labels_tensor = torch.tensor(labels_train.reshape(-1, 1)).float()

dev_tokens_tensor = torch.tensor(dev_tokens_ids)
dev_labels_tensor = torch.tensor(labels_dev.reshape(-1, 1)).float()

test_tokens_tensor = torch.tensor(test_tokens_ids)
test_labels_tensor = torch.tensor(labels_test.reshape(-1, 1)).float()

train_masks_tensor = torch.tensor(train_masks)
dev_masks_tensor = torch.tensor(dev_masks)
test_masks_tensor = torch.tensor(test_masks)

'439.065088M'

In [None]:
train_dataset = TensorDataset(train_tokens_tensor, train_masks_tensor, train_labels_tensor)
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=BATCH_SIZE)

dev_dataset = TensorDataset(dev_tokens_tensor, dev_masks_tensor, dev_labels_tensor)
dev_sampler = SequentialSampler(dev_dataset)
dev_dataloader = DataLoader(dev_dataset, sampler=dev_sampler, batch_size=BATCH_SIZE)

test_dataset = TensorDataset(test_tokens_tensor, test_masks_tensor, test_labels_tensor)
test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)

In [None]:
param_optimizer = list(bert_clf.sigmoid.named_parameters()) 
optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]

optimizer = Adam(bert_clf.parameters(), lr=2e-5)

In [None]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
torch.cuda.empty_cache()

In [None]:
t0 = time.time()
for epoch_num in range(EPOCHS):

  bert_clf.train()
  train_loss = 0
  for step_num, batch_data in enumerate(train_dataloader):
    token_ids, masks, labels = tuple(t.to(device) for t in batch_data)
    print(str(torch.cuda.memory_allocated(device)/1000000 ) + 'M')
    
    logits = bert_clf(token_ids, masks)
    loss_func = nn.BCELoss()

    batch_loss = loss_func(logits, labels)
    train_loss += batch_loss.item()
    
    bert_clf.zero_grad()
    batch_loss.backward()
        

    clip_grad_norm_(parameters=bert_clf.parameters(), max_norm=1.0)
    optimizer.step()
        
    clear_output(wait=True)
    print('Epoch: ', epoch_num + 1)
    print("\r" + "{0}/{1} train loss: {2} ".format(step_num, len(train) / BATCH_SIZE, train_loss / (step_num + 1)))

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-t0)))

Epoch:  3
531/531.84375 train loss: 0.15895379212845986 
Total training took 0:14:15 (h:mm:ss)


## Evaluation

In [None]:
bert_clf.eval()

dev_all_logits = []

with torch.no_grad():
    for step_num, batch_data in enumerate(test_dataloader):

        token_ids, masks, labels = tuple(t.to(device) for t in batch_data)

        logits = bert_clf(token_ids, masks)
        loss_func = nn.BCELoss()
        loss = loss_func(logits, labels)

        # Move logits and labels to CPU
        numpy_logits = logits.cpu().detach().numpy()

        # label_ids = labels.to('cpu').numpy()
        # true_labels.append(label_ids) 

         
        dev_all_logits += list(numpy_logits[:, 0]) 
        # bert_predicted += list(numpy_logits[:, 0] > threshold) 
        
        

In [None]:
bert_clf.eval()

all_logits = []

with torch.no_grad():
    for step_num, batch_data in enumerate(test_dataloader):

        token_ids, masks, labels = tuple(t.to(device) for t in batch_data)

        logits = bert_clf(token_ids, masks)
        loss_func = nn.BCELoss()
        loss = loss_func(logits, labels)

        # Move logits and labels to CPU
        numpy_logits = logits.cpu().detach().numpy()

        # label_ids = labels.to('cpu').numpy()
        # true_labels.append(label_ids) 

         
        all_logits += list(numpy_logits[:, 0]) 
        # bert_predicted += list(numpy_logits[:, 0] > threshold) 

        

### threshold

In [None]:
theshold = 0.5
dev["prob"] = all_logits

for i in dev.index:
  if dev["prob"][i] > threshold:
    dev.at[i,"prediction"] = 1
  else:
    dev.at[i,"prediction"] = 0

from sklearn.metrics import classification_report,accuracy_score,f1_score
dev_true = dev["label"]
dev_prediction = dev["prediction"]
print("threshold")
print("f1:",f1_score(dev_true, dev_prediction))

In [None]:
#find f1
dev["prob"] = all_logits
f1 = []
for threshold in x:
  for i in dev.index:
  if dev["prob"][i] > threshold:
    dev.at[i,"prediction"] = 1
  else:
    dev.at[i,"prediction"] = 0

  from sklearn.metrics import classification_report,accuracy_score,f1_score
  dev_true = dev["label"]
  dev_prediction = dev["prediction"]
  f1.append(f1_score(dev_true, dev_prediction))

import matplotlib,pyplot as plt
import numpy as np

start,stop,step = 
x = np.arange(start,stop,step)
f1 = [round(i,6) for i in f1]
plt.xlabel("threshold range")
plt.ylabel("f1 score")
plt.plot(x,f1,'r-')

for a,b in zip(x,f1):
  plt.text(a,b,b)

plt.show()


print(f'threshold:{f1.index(max(f1))*step+start}','\n',f'f1:{f1}','\n',f'f1 max:{max(f1)}')


In [None]:
test["prob"] = all_logits


for i in test.index:
  if test["prob"][i] > threshold:
    test.at[i,"prediction"] = 1
  else:
    test.at[i,"prediction"] = 0

In [None]:
from sklearn.metrics import classification_report,accuracy_score,f1_score
true = test["label"]
prediction = test["prediction"]
print("base-bs")
print("f1:",f1_score(true, prediction))
print("acc:",accuracy_score(true, prediction))
print(classification_report(true, prediction, digits=3))

base-bs
f1: 0.5413929040735873
acc: 0.8326540397986094
              precision    recall  f1-score   support

           0      0.898     0.897     0.898      3411
           1      0.541     0.542     0.541       760

    accuracy                          0.833      4171
   macro avg      0.719     0.720     0.720      4171
weighted avg      0.833     0.833     0.833      4171



In [None]:
import numpy as np
bert_clf.eval()
f1 = []
for threshold in np.arange(0.836,0.844,0.001):
  bert_predicted = []
  all_logits = []
  true_labels = []
  with torch.no_grad():
      for step_num, batch_data in enumerate(test_dataloader):

          token_ids, masks, labels = tuple(t.to(device) for t in batch_data)

          logits = bert_clf(token_ids, masks)
          loss_func = nn.BCELoss()
          loss = loss_func(logits, labels)

          # Move logits and labels to CPU
          numpy_logits = logits.cpu().detach().numpy()

          label_ids = labels.to('cpu').numpy()
          true_labels.append(label_ids) 

          
          all_logits += list(numpy_logits[:, 0]) 
          bert_predicted += list(numpy_logits[:, 0] > threshold) 

  flat_true_labels = np.concatenate(true_labels, axis=0) 
  f1.append(f1_score(flat_true_labels, bert_predicted))

print(f1)

[0.5670916818457802, 0.5677811550151975, 0.5681265206812652, 0.5667276051188299, 0.5670731707317073, 0.5670731707317073, 0.5677655677655677, 0.5663608562691131, 0.5670545009185547]


In [None]:
print(np.arange(0.836,0.844,0.001))
f1 = [ round(i,6) for i in f1]
print(f1.index(max(f1)))

# 0.4-0.9[0.5503999999999999, 0.5551289083927592, 0.5566778900112234, 0.5585689555683785, 0.5645933014354068, 0.5603557814485387]
# 0.75,0.85[0.5615474794841734, 0.5670356703567037]
# 0.8-0.9[0.564593, 0.565947, 0.566606, 0.566929, 0.567073, 0.567036, 0.563625, 0.56125, 0.561558, 0.560606]
# 0.830-0.855 [0.566929, 0.566748, 0.567073, 0.567055, 0.567036, 0.564007]
# 0.835-0.845 [0.566748, 0.567092, 0.567781, 0.568127, 0.566728, 0.567073, 0.567073, 0.567766, 0.566361, 0.567055, 0.567055]

[0.836 0.837 0.838 0.839 0.84  0.841 0.842 0.843 0.844]
2


threshold = 0.838

f1 score max = 0.568127

Thresholds are learned from the **development** set for supervised match scoring methods,

### BM

In [None]:
test['prob'] = all_logits

In [None]:
#unique_args:1174  total_args: 4171 matching:760 prob>0.5:762
unique_arg = list(test["argument"].unique()) 
num_past = 0
for arg in unique_arg:
  prob = test["prob"].loc[test["argument"]==arg].tolist()
  for i,j in enumerate(prob):
    if j == max(prob):  
      test.at[i+num_past,"prediction"] = 1
    else:
      test.at[i+num_past,"prediction"] = 0
  num_past += len(prob)

In [None]:
len(test.loc[test["prediction"]==1])

1174

In [None]:
from sklearn.metrics import classification_report,accuracy_score,f1_score
true = test["label"]
prediction = test["prediction"]
print("base-bs")
print("f1:",f1_score(true, prediction))
print("acc:",accuracy_score(true, prediction))
print(classification_report(true, prediction, digits=3))

base-bs
f1: 0.532574974146846
acc: 0.783265403979861
              precision    recall  f1-score   support

           0      0.918     0.807     0.859      3411
           1      0.439     0.678     0.533       760

    accuracy                          0.783      4171
   macro avg      0.678     0.742     0.696      4171
weighted avg      0.831     0.783     0.799      4171



### threshold+BM

In [None]:
test['prob'] = all_logits

In [None]:
#unique_args:1174  total_args: 4171 matching:760 prob>0.5:762
unique_arg = list(test["argument"].unique()) 
num_past = 0
theshold = 0.5
for arg in unique_arg:
  prob = test["prob"].loc[test["argument"]==arg].tolist()
  for i,j in enumerate(prob):
    if j == max(prob) and j > theshold:  
      test.at[i+num_past,"prediction"] = 1
    else:
      test.at[i+num_past,"prediction"] = 0
  num_past += len(prob)

In [None]:
from sklearn.metrics import classification_report,accuracy_score,f1_score
true = test["label"]
prediction = test["prediction"]
print("base-bs")
print("f1:",f1_score(true, prediction))
print("acc:",accuracy_score(true, prediction))
print(classification_report(true, prediction, digits=3))

base-bs
f1: 0.5741324921135647
acc: 0.8705346439702709
              precision    recall  f1-score   support

           0      0.892     0.958     0.924      3411
           1      0.717     0.479     0.574       760

    accuracy                          0.871      4171
   macro avg      0.804     0.718     0.749      4171
weighted avg      0.860     0.871     0.860      4171



### Dual threshold

In [None]:
test['prob'] = all_logits

In [None]:
#unique_args:1174  total_args: 4171 matching:760 prob>0.5:762
unique_arg = list(test["argument"].unique()) 
num_past = 0
threshold_low = 0.5
threshold_high = 0.7
good = []
for arg in unique_arg:
  prob = test["prob"].loc[test["argument"]==arg].tolist()
  exceed = [p for p in prob if p > threshold_high]
  for i,j in enumerate(prob):
    if len(exceed) >=1 :
      # for e in exceed:
      #   test.at[prob.index(exceed)+num_past,"prediction"] = 1
      if j > threshold_low:
        test.at[i+num_past,"prediction"] = 1
      else:
        test.at[i+num_past,"prediction"] = 0

    elif j == max(prob) and j > threshold_low:
      test.at[i+num_past,"prediction"] = 1
    else:
      test.at[i+num_past,"prediction"] = 0
  good.append(exceed)
  num_past += len(prob)

In [None]:
test[10:20]

In [None]:
max([len(exceed) for exceed in good])
  

In [None]:
from sklearn.metrics import classification_report,accuracy_score,f1_score
true = test["label"]
prediction = test["prediction"]
print("base-bs")
print("f1:",f1_score(true, prediction))
print("acc:",accuracy_score(true, prediction))
print(classification_report(true, prediction, digits=3))

base-bs
f1: 0.5424621461487822
acc: 0.8333732917765524
              precision    recall  f1-score   support

           0      0.898     0.898     0.898      3411
           1      0.543     0.542     0.542       760

    accuracy                          0.833      4171
   macro avg      0.720     0.720     0.720      4171
weighted avg      0.833     0.833     0.833      4171



### Save and load

In [None]:
torch.save({
            'epoch': epoch_num,
            'model_state_dict': bert_clf.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': batch_loss,
            }, "/content/drive/MyDrive/Keypoints/bert1-base-bs-tp-in.pth")

In [None]:
bert_clf = BertBinaryClassifier() 
bert_clf.load_state_dict(torch.load("/content/drive/MyDrive/Keypoints/bert1-base-bs-tp-in.pth")['model_state_dict'],strict=False) 
bert_clf.to(device) 
bert_clf.eval()