# Track 2: Key Point Matching [Bert-Base]


### Settings & Initialization

⋅ Install tools and dependencies. 

⋅ Set Environemnt Variables. 

⋅ Define Constants.

In [1]:
!pip install transformers
!pip install optuna



In [2]:
from transformers import DataCollatorWithPadding,TrainingArguments,default_data_collator,EvalPrediction,AutoModelForSequenceClassification,AutoTokenizer,Trainer
import pandas as pd
import os
import numpy as np
import evaluate
import pandas as pd
from sklearn.metrics import average_precision_score


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
accuracy = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load('recall')
f1_metric = evaluate.load("f1")
ID2LABEL = {0: "NEGATIVE", 1: "POSITIVE"}
LABEL2ID = {"NEGATIVE": 0, "POSITIVE": 1}
GOLD_DATA_DIR = './../kpm_data/'

### Preprocessing & PostProcessing Methods
 

⋅ Load Raw data by using the protocol defined on paper ArgMining KPA 2021 Shared task.

⋅ Tokenize function to apply before training by using pre-trained tokenizer *distilbert-base-uncased*

⋅ Merge labels,topic and arguments in the same dataframe

⋅ Data Cleaning


In [4]:
def load_kpm_data(subset, submitted_kp_file=None,nrows=None):
    print("\nֿ** loading task data:")
    arguments_file = os.path.join(GOLD_DATA_DIR, f"arguments_{subset}.csv")
    if not submitted_kp_file:
        key_points_file = os.path.join(GOLD_DATA_DIR, f"key_points_{subset}.csv")
    else:
        key_points_file=submitted_kp_file
    labels_file = os.path.join(GOLD_DATA_DIR, f"labels_{subset}.csv")
    arguments_df = pd.read_csv(arguments_file,nrows=nrows)
    key_points_df = pd.read_csv(key_points_file,nrows=nrows)
    labels_file_df = pd.read_csv(labels_file,nrows=nrows)

    return arguments_df, key_points_df, labels_file_df

def preprocess_function(examples):
    token = tokenizer(examples["key_point"],examples["argument"],
     truncation=True)
    token["labels"] = examples["label"]
    return token

def merge_df(arg_df, kp_df, labels_df):
    labels_df=labels_df.merge(kp_df,on="key_point_id",how="left")
    labels_df=labels_df.merge(arg_df,on='arg_id',how="left")
    labels_df.drop(["stance_x", "topic_x"], axis=1, inplace=True)
    labels_df=labels_df.rename(columns={"stance_y":"stance", "topic_y":"topic"})
    return labels_df

def remove_nan(labels_df):
    labels_df = labels_df[labels_df['argument'].notna()]
    labels_df = labels_df[labels_df['topic'].notna()]
    labels_df = labels_df[labels_df['stance'].notna()]
    return labels_df


### Grid Search

⋅ Set configuration of hyperparameter search with [optuna](https://www.example.com)

⋅ Initialize model for the grid search

⋅ Retrieve evaluations metrics to select best model

In [5]:
def optuna_hp_space(trial):
    return {
        "weight_decay": trial.suggest_float("weight_decay", 0.0, 0.1),
        "adam_epsilon": trial.suggest_float("adam_epsilon", 1e-8, 1e-6, log=True),
        "num_train_epochs": trial.suggest_categorical("num_train_epochs", [2,4,8]),
        "batch_size": trial.suggest_categorical("batch_size", [16, 32, 64, 128]),
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True),
    }

def model_init(trial):
    return AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",num_labels=2, id2label=ID2LABEL, label2id=LABEL2ID)

def compute_objective(metrics):
    return metrics["eval_loss"]

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions[:, 0]
    predictions = np.where(predictions < 0.5, 0, 1)
    _precision = precision_metric.compute(predictions=predictions, references=labels)
    _map = average_precision_score(labels, predictions)
    _recall = recall_metric.compute(predictions=predictions, references=labels)
    _f1 = f1_metric.compute(predictions=predictions, references=labels)
    _accuracy= accuracy.compute(predictions=predictions, references=labels)
    return {
                "precision": _precision,
                "recall": _recall,
                "f1": _f1,
                "accuracy": _accuracy,
                "map": _map,
            }

In [6]:
if __name__ == "__main__":
    
    
    model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",num_labels=2, id2label=ID2LABEL, label2id=LABEL2ID)
    
    # Merge Labels with arguments and key_points for training dataset
    arg_df_dev, kp_df_dev, labels_df_dev = load_kpm_data( subset="train")
    labels_df_dev=merge_df(arg_df_dev, kp_df_dev, labels_df_dev)
    # Merge Labels with arguments and key_points for test dataset
    arg_df_test, kp_df_test, labels_df_test = load_kpm_data(subset="dev")
    labels_df_test=merge_df(arg_df_test, kp_df_test, labels_df_test)
    # labels_df_test=labels_df_test.merge(kp_df_test,on="key_point_id",how="left")
    # labels_df_test=labels_df_test.merge(arg_df_test,on='arg_id',how="left")
    # labels_df_test.drop(["stance_x", "topic_x"], axis=1, inplace=True)
    # labels_df_test=labels_df_test.rename(columns={"stance_y":"stance", "topic_y":"topic"})
    #encode each sentence and append to dictionary
    # filter where labels are not nan
    # labels_df_dev = labels_df_dev[labels_df_dev['argument'].notna()]
    # labels_df_dev = labels_df_dev[labels_df_dev['topic'].notna()]
    # labels_df_dev = labels_df_dev[labels_df_dev['stance'].notna()]
    labels_df_dev = remove_nan(labels_df_dev)
    labels_df_test = remove_nan(labels_df_test)
    # # labels_df_test = labels_df_test[labels_df_test['argument'].notna()]
    # # labels_df_test = labels_df_test[labels_df_test['topic'].notna()]
    # # labels_df_test = labels_df_test[labels_df_test['stance'].notna()]
    result_train = labels_df_dev.apply(preprocess_function,axis=1)    
    result_test = labels_df_test.apply(preprocess_function,axis=1)
  


    training_args = TrainingArguments(
    output_dir="my_awesome_model",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=result_train,
    eval_dataset=result_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
#     trainer = Trainer(
#     model=None,
#     args=training_args,
#     train_dataset=result_train,
#     eval_dataset=result_test,
#     compute_metrics=compute_metrics,
#     tokenizer=tokenizer,
#     model_init=model_init,
#     data_collator=data_collator,
# )
#     train_result = trainer.hyperparameter_search(
#     direction="maximize",
#     backend="optuna",
#     hp_space=optuna_hp_space,
#     n_trials=20,
#     compute_objective=compute_objective,
# )
    train_result = trainer.train()
    metrics = train_result.metrics
    max_train_samples = (len(result_train))
    metrics["train_samples"] = min(max_train_samples, len(result_train))
    trainer.log_metrics("train", metrics)
    

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier


ֿ** loading task data:

ֿ** loading task data:


***** Running training *****
  Num examples = 20635
  Num Epochs = 2
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2580
  Number of trainable parameters = 66955010
  0%|          | 0/2580 [00:00<?, ?it/s]You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 19%|█▉        | 500/2580 [03:50<16:14,  2.14it/s]

{'loss': 0.4191, 'learning_rate': 1.612403100775194e-05, 'epoch': 0.39}


 39%|███▉      | 1000/2580 [07:42<12:39,  2.08it/s]

{'loss': 0.3302, 'learning_rate': 1.2248062015503876e-05, 'epoch': 0.78}


 50%|█████     | 1290/2580 [09:57<09:11,  2.34it/s]***** Running Evaluation *****
  Num examples = 3458
  Batch size = 16
Trainer is attempting to log a value of "{'precision': 0.0888797023563456}" of type <class 'dict'> for key "eval/precision" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'recall': 0.29132791327913277}" of type <class 'dict'> for key "eval/recall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'f1': 0.1362052581564777}" of type <class 'dict'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'accuracy': 0.21139386928860612}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorr

{'eval_loss': 0.375709593296051, 'eval_precision': {'precision': 0.0888797023563456}, 'eval_recall': {'recall': 0.29132791327913277}, 'eval_f1': {'f1': 0.1362052581564777}, 'eval_accuracy': {'accuracy': 0.21139386928860612}, 'eval_map': 0.17713663156910112, 'eval_runtime': 24.328, 'eval_samples_per_second': 142.141, 'eval_steps_per_second': 8.92, 'epoch': 1.0}


Model weights saved in my_awesome_model/checkpoint-1290/pytorch_model.bin
tokenizer config file saved in my_awesome_model/checkpoint-1290/tokenizer_config.json
Special tokens file saved in my_awesome_model/checkpoint-1290/special_tokens_map.json
 58%|█████▊    | 1499/2580 [16:19<08:49,  2.04it/s]   

KeyboardInterrupt: 