In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install transformers
!pip install "ray[tune]"

import torch
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer)
import pandas as pd
import numpy as np
import os
from sklearn.metrics import confusion_matrix
os.environ["WANDB_DISABLED"] = "true"

Mounted at /content/drive
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 13.5 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.0-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 69.7 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 57.7 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.0 tokenizers-0.13.2 transformers-4.24.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ray[tune]
  Downloading ray-2.1.0-cp37-cp37m-manylinux2014_x86_64.whl (59.1 MB)
[K     

In [None]:
no_train_epochs = 4
freeze_layer_count = 4
pretrained_model_tokenizer_path = r"distilroberta-base"
df_input = pd.read_csv(r"")
df_input_val = pd.read_csv(r"")
df_test = pd.read_csv(r"")


In [None]:
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_tokenizer_path)

# Create torch dataset
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])


def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    print(confusion_matrix(labels, pred))

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred, average='macro')
    precision = precision_score(y_true=labels, y_pred=pred, average='macro')
    f1 = f1_score(y_true=labels, y_pred=pred, average='macro')

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_tokenizer_path, num_labels=2)

for layer in model.roberta.encoder.layer[:freeze_layer_count]:
    for param in layer.parameters():
        param.requires_grad = False

df_input = df_input.sample(frac=1)

text_train = list(df_input['text'])
label_train = list(df_input['label'])

df_input_val = df_input_val.sample(frac=1)

text_val = list(df_input_val['text'])
label_val = list(df_input_val['label'])

#text_train, text_val, label_train, label_val = train_test_split(text, label, test_size=0.2)
text_train_tokenized = tokenizer(text_train, padding=True, truncation=True, max_length=100)
text_val_tokenized = tokenizer(text_val, padding=True, truncation=True, max_length=100)

train_dataset = Dataset(text_train_tokenized, label_train)
val_dataset = Dataset(text_val_tokenized, label_val)

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        pretrained_model_tokenizer_path, return_dict=True)
    
# Define Trainer
args = TrainingArguments(
    output_dir="output",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    eval_steps=100,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=no_train_epochs,
    seed=200,
)

#args = TrainingArguments("test", save_strategy="epoch", save_total_limit=1, evaluation_strategy="steps", eval_steps=500, disable_tqdm=True)

trainer = Trainer(
    #model_init=model_init,
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    #callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

'''best_trial = trainer.hyperparameter_search(
            backend="ray",
            direction='maximize',
            n_trials=10,
       )'''

#RUNNING    | 172.28.0.2:9174 |     1.12076e-05 |                  4 |                     16 |  1.89943 
trainer.train()
trainer.save_model(model_path)

# Create torch dataset
df_test = df_test.sample(frac=1)

text_test = list(df_test['text'])
label_test = list(df_test['label'])

text_test_tokenized = tokenizer(text_test, padding=True, truncation=True, max_length=100)
test_dataset = Dataset(text_test_tokenized)

model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)

# Define test trainer
test_trainer = Trainer(model)

# Make prediction
raw_pred, _, _ = test_trainer.predict(test_dataset)

# Preprocess raw predictions
y_pred = np.argmax(raw_pred, axis=1)

print("Testing done")
print(y_pred)
print("Confusion Matrix:")
print(confusion_matrix(label_test, y_pred))

test_f1 = f1_score(y_true=label_test, y_pred=y_pred, average='macro')
test_accuracy = accuracy_score(y_true=label_test, y_pred=y_pred)
test_recall = recall_score(y_true=label_test, y_pred=y_pred, average='macro')
test_precision = precision_score(y_true=label_test, y_pred=y_pred, average='macro')

print("Test scores")
print("Accuracy: {}\nF1: {}\nPrecision: {}\nRecall: {}\n".format(test_accuracy, test_f1, test_precision, test_recall))

Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilroberta-base/snapshots/c1149320821601524a8d373726ed95bbd2bc0dc2/config.json
Model config RobertaConfig {
  "_name_or_path": "distilroberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.23.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /root/.cache/hugging

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.627,0.392029,0.776,0.797155,0.795597,0.775986
2,0.3866,0.284311,0.888,0.885351,0.885351,0.885351
3,0.3029,0.376759,0.904,0.90173,0.90173,0.90173
4,0.2527,0.400775,0.904,0.903683,0.89924,0.901212
5,0.2156,0.42184,0.904,0.903683,0.89924,0.901212
6,0.1801,0.46829,0.896,0.894362,0.892296,0.893268


***** Running Evaluation *****
  Num examples = 125
  Batch size = 8


[[48 24]
 [ 4 49]]


***** Running Evaluation *****
  Num examples = 125
  Batch size = 8


[[65  7]
 [ 7 46]]


***** Running Evaluation *****
  Num examples = 125
  Batch size = 8


[[66  6]
 [ 6 47]]


***** Running Evaluation *****
  Num examples = 125
  Batch size = 8


[[67  5]
 [ 7 46]]


***** Running Evaluation *****
  Num examples = 125
  Batch size = 8


[[67  5]
 [ 7 46]]


***** Running Evaluation *****
  Num examples = 125
  Batch size = 8


[[66  6]
 [ 7 46]]




Training completed. Do not forget to share your model on huggingface.co/models =)


Saving model checkpoint to /content/drive/MyDrive/Thesis/finetuned_dr_ksdt_baseline_20_t6_epochs
Configuration saved in /content/drive/MyDrive/Thesis/finetuned_dr_ksdt_baseline_20_t6_epochs/config.json
Model weights saved in /content/drive/MyDrive/Thesis/finetuned_dr_ksdt_baseline_20_t6_epochs/pytorch_model.bin
loading configuration file /content/drive/MyDrive/Thesis/finetuned_dr_ksdt_baseline_20_t6_epochs/config.json
Model config RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Thesis/finetuned_dr_ksdt_baseline_20_t6_epochs",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,


Testing done
[1 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0
 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1 1 0 0 1 0 0 1 0 0 1 0 0 0 1 1 1 0 0
 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 1 1 1 0 0 1 0 1 0 0 0 1]
Confusion Matrix:
[[75  5]
 [11 34]]
Test scores
Accuracy: 0.872
F1: 0.8565691336775674
Precision: 0.8719439475253429
Recall: 0.8465277777777778



In [None]:
pretrained_model_tokenizer_path = r"distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_tokenizer_path)

# Create torch dataset
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

model_path = ""

#_perturbed_negation_baseline_100
df_test_adv_neg = pd.read_csv(r"")

text_test_adv_neg = list(df_test_adv_neg['text'])
label_test_adv_neg = list(df_test_adv_neg['label'])

text_test_adv_neg_tokenized = tokenizer(text_test_adv_neg, padding=True, truncation=True, max_length=100)
test_dataset_adv_neg = Dataset(text_test_adv_neg_tokenized)

model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)

# Define test trainer
test_adv_neg_trainer = Trainer(model)

# Make prediction
raw_pred_adv_neg, _, _ = test_adv_neg_trainer.predict(test_dataset_adv_neg)

raw_pred_adv_neg = raw_pred_adv_neg[0]
# Preprocess raw predictions
y_pred_adv_neg = np.argmax(raw_pred_adv_neg, axis=1)

print(raw_pred_adv_neg)
print("Testing done")
print(y_pred_adv_neg)
print("Confusion Matrix:")
print(confusion_matrix(label_test_adv_neg, y_pred_adv_neg))


test_adv_neg_f1 = f1_score(y_true=label_test_adv_neg, y_pred=y_pred_adv_neg, average='macro')
test_adv_neg_accuracy = accuracy_score(y_true=label_test_adv_neg, y_pred=y_pred_adv_neg)
test_adv_neg_recall = recall_score(y_true=label_test_adv_neg, y_pred=y_pred_adv_neg, average='macro')
test_adv_neg_precision = precision_score(y_true=label_test_adv_neg, y_pred=y_pred_adv_neg, average='macro')

print("Test scores For Negation")
print("Accuracy: {}\nF1: {}\nPrecision: {}\nRecall: {}\n".format(test_adv_neg_accuracy, test_adv_neg_f1, test_adv_neg_precision, test_adv_neg_recall))

loading configuration file /content/drive/MyDrive/Thesis/model_dr_df_gayrights_het_20e/config.json
Model config RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Thesis/model_dr_df_gayrights_het_20e",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.24.0",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading f

[[ 0.5767654  -0.7248151 ]
 [-0.66268396  0.6675228 ]
 [-0.01091881 -0.03802142]
 [ 1.313789   -1.466486  ]
 [-0.593473    0.6783388 ]
 [-1.435955    1.5116082 ]
 [-0.7051036   0.679502  ]
 [ 0.33301216 -0.24528293]
 [-0.50969166  0.6779821 ]
 [-0.688125    0.84002566]
 [ 0.8571471  -1.0969044 ]
 [ 0.32382756 -0.43421003]
 [-0.3754681   0.3760809 ]
 [-1.6378511   1.7588665 ]
 [ 0.527103   -0.5236211 ]
 [-0.8370453   0.9310567 ]
 [ 1.2860839  -1.582164  ]
 [-2.3191764   2.445303  ]
 [-1.8710716   1.9750698 ]
 [ 1.1299139  -1.3155646 ]
 [-0.3562359   0.33243665]
 [ 0.6632202  -0.71466273]
 [-2.069438    2.1197567 ]
 [-2.1473763   2.2932649 ]
 [-1.779853    1.9149762 ]
 [-1.8105823   1.823574  ]
 [-1.5573834   1.5718364 ]
 [-0.3731844   0.41250715]
 [ 1.064401   -1.4343277 ]
 [-2.012571    2.1962478 ]
 [ 0.6146445  -0.764507  ]
 [-2.6920815   2.9445598 ]
 [-0.5509793   0.4730823 ]
 [-1.4928645   1.4528519 ]
 [-1.1093079   1.1370183 ]
 [-2.0570986   2.1974428 ]
 [ 1.626082   -1.8582761 ]
 

In [None]:
model_path = ""

df_test_adv_neg = pd.read_csv(r"")

text_test_adv_spell = list(df_test_adv_neg['text'])
label_test_adv_spell = list(df_test_adv_neg['label'])

text_test_adv_spell_tokenized = tokenizer(text_test_adv_spell, padding=True, truncation=True, max_length=100)
test_dataset_adv_spell = Dataset(text_test_adv_spell_tokenized)

model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)

# Define test trainer
test_adv_spell_trainer = Trainer(model)

# Make prediction
raw_pred_adv_spell, _, _ = test_adv_spell_trainer.predict(test_dataset_adv_spell)

# Preprocess raw predictions
y_pred_adv_spell = np.argmax(raw_pred_adv_spell, axis=1)

print("Testing done")
print(y_pred_adv_spell)
print("Confusion Matrix:")
print(confusion_matrix(label_test_adv_spell, y_pred_adv_spell))

test_adv_spell_f1 = f1_score(y_true=label_test_adv_spell, y_pred=y_pred_adv_spell, average='macro')
test_adv_spell_accuracy = accuracy_score(y_true=label_test_adv_spell, y_pred=y_pred_adv_spell)
test_adv_spell_recall = recall_score(y_true=label_test_adv_spell, y_pred=y_pred_adv_spell, average='macro')
test_adv_spell_precision = precision_score(y_true=label_test_adv_spell, y_pred=y_pred_adv_spell, average='macro')

print("Test scores for Spelling")
print("Accuracy: {}\nF1: {}\nPrecision: {}\nRecall: {}\n".format(test_adv_spell_accuracy, test_adv_spell_f1, test_adv_spell_precision, test_adv_spell_recall))


loading configuration file /content/drive/MyDrive/Thesis/finetuned_dr_ksdt_baseline_20_t6_epochs/config.json
Model config RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Thesis/finetuned_dr_ksdt_baseline_20_t6_epochs",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.23.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading weights file /content/drive/MyDrive/Thesis/finetuned_dr_ksdt_bas

Testing done
[0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 1 1 0 0 0 1
 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 0 1 1
 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 1
 0 0 1 0 1 0 0 0 0 0 0 0 1 1]
Confusion Matrix:
[[76  4]
 [11 34]]
Test scores for Spelling
Accuracy: 0.88
F1: 0.8647283745761489
Precision: 0.8841500302480338
Recall: 0.8527777777777777

