# **BERT CUSTOM 2**

## **ENVIRONMENT SETUP**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m94.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m104.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# Download the dataset

!rm arg_quality_rank_30k.csv
!wget "https://www.research.ibm.com/haifa/dept/vst/files/IBM_Debater_(R)_arg_quality_rank_30k.zip"
!unzip *.zip
!rm *.zip
!rm readme.txt

rm: cannot remove 'arg_quality_rank_30k.csv': No such file or directory
--2023-04-04 11:43:34--  https://www.research.ibm.com/haifa/dept/vst/files/IBM_Debater_(R)_arg_quality_rank_30k.zip
Resolving www.research.ibm.com (www.research.ibm.com)... 52.116.220.135
Connecting to www.research.ibm.com (www.research.ibm.com)|52.116.220.135|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://research.ibm.com/haifa/dept/vst/files/IBM_Debater_(R)_arg_quality_rank_30k.zip [following]
--2023-04-04 11:43:35--  https://research.ibm.com/haifa/dept/vst/files/IBM_Debater_(R)_arg_quality_rank_30k.zip
Resolving research.ibm.com (research.ibm.com)... 52.116.220.135
Connecting to research.ibm.com (research.ibm.com)|52.116.220.135|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1524714 (1.5M) [application/zip]
Saving to: ‘IBM_Debater_(R)_arg_quality_rank_30k.zip’


2023-04-04 11:43:41 (301 KB/s) - ‘IBM_Debater_(R)_arg_quality_rank_30k.zi

## **IMPORT DATASET**

In [4]:
import pandas as pd

df = pd.read_csv("./arg_quality_rank_30k.csv")
df.head()

Unnamed: 0,argument,topic,set,WA,MACE-P,stance_WA,stance_WA_conf
0,"""marriage"" isn't keeping up with the times. a...",We should abandon marriage,train,0.846165,0.297659,1,1.0
1,.a multi-party system would be too confusing a...,We should adopt a multi-party system,train,0.891271,0.726133,-1,1.0
2,\ero-tolerance policy in schools should not be...,We should adopt a zero-tolerance policy in sch...,dev,0.721192,0.396953,-1,1.0
3,`people reach their limit when it comes to the...,Assisted suicide should be a criminal offence,train,0.730395,0.225212,-1,1.0
4,"100% agree, should they do that, it would be a...",We should abolish safe spaces,train,0.236686,0.004104,1,0.805517


### Split into Train and Test Sets

In [5]:
df_train = df[df["set"] != "test"].reset_index(drop=True) # Combine train and dev into train set
df_train = df_train.drop(["set"], axis=1)

df_test = df[df["set"] == "test"].reset_index(drop=True)
df_test = df_test.drop(["set"], axis=1)

### Display Dataset Metrics

In [6]:
import numpy as np

In [7]:
print(f"Length of dataset = {len(df)}")
print(f"Number of training data = {len(df_train)}")
print(f"Number of testing data = {len(df_test)}")

Length of dataset = 30497
Number of training data = 24182
Number of testing data = 6315


In [8]:
print(f"Number of Topics = {len(np.unique(df.topic))}")
print(f"Number of Topics in training data = {len(np.unique(df_train.topic))}")
print(f"Number of Topics in testing data = {len(np.unique(df_test.topic))}")

Number of Topics = 71
Number of Topics in training data = 56
Number of Topics in testing data = 15


## **TEXT CLEANING**

In [9]:
import re

In [10]:
def clean_text_bert(text):
    text = text.lower() # Convert the text into lowercase
    text = text.replace('</br>', '') # Remove </br>
    text = text.replace('\n', '') # Remove \n
    
    # Remove quotes
    text = re.sub(r"\'", "", text) 
    text = re.sub(r"\"", "", text) 
    
    text = re.sub(r"[^\w]", " ", text) # Remove all symbols

    text = re.sub(r'[ ]{2,}', ' ', text) # Remove extra spaces
    text = re.sub(r'[ \t]+$', '', text) # Remove trailing white spaces
    
    return text

In [11]:
df_train["argument"] = df_train["argument"].apply(clean_text_bert)
df_test["argument"] = df_test["argument"].apply(clean_text_bert)

In [12]:
df_train["topic"] = df_train["topic"].apply(clean_text_bert)
df_test["topic"] = df_test["topic"].apply(clean_text_bert)

In [13]:
df_train["argument"]

0        marriage isnt keeping up with the times abando...
1         a multi party system would be too confusing a...
2         ero tolerance policy in schools should not be...
3         people reach their limit when it comes to the...
4        100 agree should they do that it would be a go...
                               ...                        
24177    zoos trap animals into a meaningless life only...
24178    zoos treat animals badly they should be closed...
24179    zoos unfairly imprison animals and cause them ...
24180    zoos work as educational centers and are not t...
24181           zoos work to help breed endangered species
Name: argument, Length: 24182, dtype: object

## **TEXT AUGMENTATION**

In [14]:
def augment(argument, topic):
    text = topic + " " + "SEP" + " " + argument
    return text

In [15]:
df_train["argument"] = [ augment(x, y) for (x, y) in zip(df_train["argument"], df_train["topic"]) ]
df_test["argument"] = [ augment(x, y) for (x, y) in zip(df_test["argument"], df_test["topic"]) ]

In [16]:
df_train["argument"]

0        we should abandon marriage SEP marriage isnt k...
1        we should adopt a multi party system SEP  a mu...
2        we should adopt a zero tolerance policy in sch...
3        assisted suicide should be a criminal offence ...
4        we should abolish safe spaces SEP 100 agree sh...
                               ...                        
24177    we should abolish zoos SEP zoos trap animals i...
24178    we should abolish zoos SEP zoos treat animals ...
24179    we should abolish zoos SEP zoos unfairly impri...
24180    we should abolish zoos SEP zoos work as educat...
24181    we should abolish zoos SEP zoos work to help b...
Name: argument, Length: 24182, dtype: object

## **PRE-CONFIG FOR BERT**

In [17]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [18]:
MAX_LEN = np.max([
    np.max([len(x) for x in df_train["argument"]]), 
    np.max([len(x) for x in df_test["argument"]])
])

BATCH_SIZE = 32
LEARNING_RATE = 2e-5

## **BUILD DATASET FOR BERT**

In [19]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [20]:
from torch.utils.data import Dataset

class ModelDataset(Dataset):
    def __init__(self, df, tokenizer, max_len):
        self.df = df
        self.max_len = max_len
        self.text = df["argument"].values
        self.tokenizer = tokenizer
        self.targets = df["WA"].values
        
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, index):
        text = self.text[index]
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long).to(device),
            'mask': torch.tensor(mask, dtype=torch.long).to(device),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long).to(device),
            'targets': torch.tensor(self.targets[index], dtype=torch.float).to(device)
        }

In [21]:
train_data = ModelDataset(df_train, tokenizer, MAX_LEN)
test_data = ModelDataset(df_test, tokenizer, MAX_LEN)

In [22]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE)

## **BUILD CUSTOM MODEL**

In [23]:
from torch import nn
from transformers import AutoModel

class CustomModel(nn.Module):
    
    def __init__(self):
        super(CustomModel, self).__init__()
        self.bert = AutoModel.from_pretrained('bert-base-uncased')
        self.stack = nn.Sequential(
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
        )

    def forward(self, ids, mask, token_type_ids):
        _, features = self.bert(
            ids, token_type_ids=token_type_ids,
            attention_mask=mask, return_dict=False
        )
        output = self.stack(features)
        return output

In [24]:
model = CustomModel().to(device)

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## **TRAIN MODEL**

In [25]:
loss_fn = nn.MSELoss() # Loss function
optimizer = torch.optim.AdamW(params=model.parameters(), lr=LEARNING_RATE) # Optimizer

In [26]:
# Define function to train the model

from tqdm import tqdm

def train(epoch):
    
    model.train()
    
    for batch in tqdm(train_loader):
        
        optimizer.zero_grad()
        
        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        token_type_ids = batch['token_type_ids'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs.view(-1), targets)
        
        loss.backward()
        optimizer.step()
        
        
    print(f'Epoch: {epoch + 1}, Loss:  {loss.item()}')

In [27]:
# Define function for model evaluation

def validation(data_loader):
    
    model.eval()
    targets = []
    outputs = []
    
    with torch.no_grad():
        
        for batch in data_loader:
            
            ids = batch['ids'].to(device, dtype = torch.long)
            mask = batch['mask'].to(device, dtype = torch.long)
            token_type_ids = batch['token_type_ids'].to(device, dtype = torch.long)
            batch_targets = batch['targets'].to(device, dtype = torch.float)
            
            batch_outputs = model(ids, mask, token_type_ids)
            
            targets.extend(batch_targets.cpu().numpy().tolist())
            outputs.extend(batch_outputs.cpu().numpy().tolist())
            
    return outputs, targets

In [28]:
# Perform model training

from copy import deepcopy
from scipy.stats import pearsonr, spearmanr

best_corr = -np.inf
best_weights = None
history = []

EPOCHS = 5

for epoch in range(EPOCHS):
    train(epoch)

    outputs, targets = validation(test_loader)
    outputs, targets = torch.tensor(outputs).view(-1), torch.tensor(targets)

    mse = loss_fn(outputs, targets)
    history.append(mse.item())
    print(f"Validation loss: {mse:>.4f}")

    corr, _ = pearsonr(outputs, targets)
    print(f"Corr: {corr:>.4f}")

    if corr > best_corr:
        best_corr = corr
        best_weights = deepcopy(model.state_dict())

100%|██████████| 756/756 [19:32<00:00,  1.55s/it]


Epoch: 1, Loss:  0.03005087375640869
Validation loss: 0.0324
Corr: 0.5235


100%|██████████| 756/756 [19:35<00:00,  1.55s/it]


Epoch: 2, Loss:  0.021719494834542274
Validation loss: 0.0293
Corr: 0.5198


100%|██████████| 756/756 [19:35<00:00,  1.55s/it]


Epoch: 3, Loss:  0.010608688928186893
Validation loss: 0.0304
Corr: 0.4942


100%|██████████| 756/756 [19:35<00:00,  1.56s/it]


Epoch: 4, Loss:  0.005205237772315741
Validation loss: 0.0334
Corr: 0.4989


100%|██████████| 756/756 [19:35<00:00,  1.55s/it]


Epoch: 5, Loss:  0.009249918162822723
Validation loss: 0.0344
Corr: 0.4902


## **EVALUATE CUSTOM MODEL**

In [29]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [30]:
model.load_state_dict(best_weights)
outputs, targets = validation(test_loader)

In [31]:
outputs =  np.array(outputs).reshape(-1)
targets =  np.array(targets).reshape(-1)

In [32]:
print("METRICS\tSCORE")

r2_val = r2_score(targets, outputs)
print("R2:", end="\t")
print(f"{r2_val:>.4f}")

mae_val = mean_absolute_error(targets, outputs)
print("MAE:", end="\t")
print(f"{mae_val:>.4f}")

mse_val = mean_squared_error(targets, outputs, squared=True)
print("MSE:", end="\t")
print(f"{mse_val:>.4f}")

rmse_val = mean_squared_error(targets, outputs, squared=False)
print("RMSE:", end="\t")
print(f"{rmse_val:>.4f}")

pearson_corr, _ = pearsonr(outputs, targets)
print("Pcorr:", end="\t")
print(f"{pearson_corr:>.4f}")

spearman_corr, _ = spearmanr(outputs, targets)
print("Scorr:", end="\t")
print(f"{spearman_corr:>.4f}")

METRICS	SCORE
R2:	0.1818
MAE:	0.1345
MSE:	0.0324
RMSE:	0.1799
Pcorr:	0.5235
Scorr:	0.4698


## **SAVE MODEL**

In [33]:
torch.save(best_weights, "./bert_custom_2.pt")
torch.save(best_weights, "/content/drive/MyDrive/bert_custom_2.pt")

In [34]:
saved_model = CustomModel().to(device)
saved_model.load_state_dict(torch.load("./bert_custom_2.pt"))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [35]:
saved_model.eval()

targets = []
outputs = []

with torch.no_grad():
    for batch in test_loader:
        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        token_type_ids = batch['token_type_ids'].to(device, dtype = torch.long)
        batch_targets = batch['targets'].to(device, dtype = torch.float)
        
        batch_outputs = saved_model(ids, mask, token_type_ids)
        
        targets.extend(batch_targets.cpu().numpy().tolist())
        outputs.extend(batch_outputs.cpu().numpy().tolist())

In [36]:
outputs =  np.array(outputs).reshape(-1)
targets =  np.array(targets).reshape(-1)

In [37]:
print("METRICS\tSCORE")

r2_val = r2_score(targets, outputs)
print("R2:", end="\t")
print(f"{r2_val:>.4f}")

mae_val = mean_absolute_error(targets, outputs)
print("MAE:", end="\t")
print(f"{mae_val:>.4f}")

mse_val = mean_squared_error(targets, outputs, squared=True)
print("MSE:", end="\t")
print(f"{mse_val:>.4f}")

rmse_val = mean_squared_error(targets, outputs, squared=False)
print("RMSE:", end="\t")
print(f"{rmse_val:>.4f}")

pearson_corr, _ = pearsonr(outputs, targets)
print("Pcorr:", end="\t")
print(f"{pearson_corr:>.4f}")

spearman_corr, _ = spearmanr(outputs, targets)
print("Scorr:", end="\t")
print(f"{spearman_corr:>.4f}")

METRICS	SCORE
R2:	0.1818
MAE:	0.1345
MSE:	0.0324
RMSE:	0.1799
Pcorr:	0.5235
Scorr:	0.4698
