In [None]:
!pip install datasets
!pip install transformers
!pip install evaluate
!pip install accelerate -U
!pip install numpy

In [None]:
from trainer_compatible.bert_prefix import BertForSequenceClassification_Prefix
from trainer_compatible.bert_prefix_gated import BertForSequenceClassification_Prefix_Gated
from trainer_compatible.roberta_prefix import RobertaForSequenceClassification_Prefix
from trainer_compatible.roberta_prefix_gated import RobertaForSequenceClassification_Prefix_Gated

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoConfig
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import evaluate
import numpy as np
import random

In [1]:
dataset = load_dataset("glue", "cola")
train_ds = dataset["train"]
test_ds = dataset["validation"]
accuracy = evaluate.load("accuracy")

In [None]:
def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  predictions = np.argmax(predictions, axis=1)
  return accuracy.compute(predictions=predictions, references=labels)

In [None]:
prefix_lengths = [1, 4, 8, 16, 64]
results = {}
num_labels=2

# BERT Architecture Models

In [None]:
model_name = "google-bert/bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
def preprocess_function(sample):
  return tokenizer(sample["sentence"], truncation=True, max_length=256)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
tokenized_train_ds = train_ds.map(preprocess_function, batched=True)
tokenized_test_ds = test_ds.map(preprocess_function, batched=True)

#### Full Parameterized BERT

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
for param in model.parameters():
    param.requires_grad = True

In [None]:
training_args = TrainingArguments(
    output_dir=f"./results/cola/bert",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_test_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
eval_result = trainer.evaluate()
print(eval_result)

#### BERT Prefix

In [None]:
for prefix_len in prefix_lengths:
  print(f"Training with prefix length: {prefix_len} for BERT Prefix.")
  config = AutoConfig.from_pretrained(model_name)
  config.num_labels = num_labels
  config.prefix_len = prefix_len
  model = BertForSequenceClassification_Prefix.from_pretrained(model_name,
                                                               config=config)
  for name, param in model.named_parameters():
    if not name in [
        "bert.encoder.prefix", "bert.pooler.dense.weight",
        "bert.pooler.dense.bias", "classifier.weight", "classifier.bias"
    ]:
      param.requires_grad = False
  total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
  print(f"Number of parameters: {total_params}")

In [None]:
training_args = TrainingArguments(
    output_dir=f"./results/cola/bert_prefix_{prefix_len}",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=50,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_test_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
eval_result = trainer.evaluate()
print(eval_result)

#### BERT Gated Prefix

In [None]:
for prefix_len in prefix_lengths:
  print(f"Training with prefix length: {prefix_len} for BERT Prefix Gated.")
  config = AutoConfig.from_pretrained(model_name)
  config.num_labels = num_labels
  config.prefix_len = prefix_len
  model = BertForSequenceClassification_Prefix_Gated.from_pretrained(
      model_name, config=config)

  for name, param in model.named_parameters():
    if not (name in [
        "bert.encoder.prefix", "bert.pooler.dense.weight",
        "bert.pooler.dense.bias", "classifier.weight", "classifier.bias"
    ]) and not ("gate_mlps" in name):
      param.requires_grad = False

  total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
  print(f"Number of parameters: {total_params}")

In [None]:
training_args = TrainingArguments(
    output_dir=f"./results/multinli/bert_prefix_gated_{prefix_len}",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=30,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_test_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
eval_result = trainer.evaluate()
print(eval_result)

# RoBERTa Architecture Models

#### Full Parameterized RoBERTa

In [None]:
model_name = "FacebookAI/roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
def preprocess_function(sample):
  return tokenizer(sample["sentence"], truncation=True, max_length=256)

In [None]:
def preprocess_function(sample):
  return tokenizer(sample["sentence"], truncation=True, max_length=256)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
tokenized_train_ds = train_ds.map(preprocess_function, batched=True)
tokenized_test_ds = test_ds.map(preprocess_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
for param in model.parameters():
    param.requires_grad = True

In [None]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of parameters: {total_params}")

In [None]:
training_args = TrainingArguments(
    output_dir=f"./results/cola/roberta",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_test_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
eval_result = trainer.evaluate()
print(eval_result)

#### RoBERTa Prefix

In [None]:
for prefix_len in prefix_lengths:
  print(f"Training with prefix length: {prefix_len} for RoBERTa Prefix.")
  config = AutoConfig.from_pretrained(model_name)
  config.num_labels = num_labels
  config.prefix_len = prefix_len
  model = RobertaForSequenceClassification_Prefix.from_pretrained(
      model_name, config=config)

  for name, param in model.named_parameters():
    if not name in [
        "roberta.encoder.prefix", "roberta.pooler.dense.weight",
        "roberta.pooler.dense.bias", "classifier.dense.weight",
        "classifier.dense.bias", "classifier.out_proj.weight",
        "classifier.out_proj.bias"
    ]:
      param.requires_grad = False

  total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
  print(f"Number of parameters: {total_params}")

In [None]:
training_args = TrainingArguments(
      output_dir=f"./results/cola/roberta_prefix_{prefix_len}",
      learning_rate=2e-5,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      num_train_epochs=30,
      weight_decay=0.01,
      evaluation_strategy="epoch",
      save_strategy="epoch",
      logging_strategy="epoch",
      save_total_limit=1,
      load_best_model_at_end=True)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_test_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
eval_result = trainer.evaluate()
print(eval_result)

#### RoBERTa Gated Prefix

In [None]:
for prefix_len in prefix_lengths:
  print(f"Training with prefix length: {prefix_len} for RoBERTa Prefix Gated.")
  config = AutoConfig.from_pretrained(model_name)
  config.num_labels = num_labels
  config.prefix_len = prefix_len
  model = RobertaForSequenceClassification_Prefix_Gated.from_pretrained(
      model_name, config=config)
  for name, param in model.named_parameters():
    if not (name in [
        "roberta.encoder.prefix", "roberta.pooler.dense.weight",
        "roberta.pooler.dense.bias", "classifier.dense.weight",
        "classifier.dense.bias", "classifier.out_proj.weight",
        "classifier.out_proj.bias"
    ]) and not ("gate_mlps" in name):
      param.requires_grad = False

  total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
  print(f"Number of parameters: {total_params}")

In [None]:
training_args = TrainingArguments(
      output_dir=f"./results/cola/roberta_prefix_gated_{prefix_len}",
      learning_rate=2e-5,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      num_train_epochs=30,
      weight_decay=0.01,
      evaluation_strategy="epoch",
      save_strategy="epoch",
      logging_strategy="epoch",
      save_total_limit=1,
      load_best_model_at_end=True)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_test_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
eval_result = trainer.evaluate()
print(eval_result)