### This notebook has been run multiple times to ensure the results in the paper

In [1]:
from datasets import load_dataset


dataset = load_dataset("csv", data_files="challenge_data/atis/full_atis_filt_gpt.csv", split='train', keep_default_na=False)

dct_dataset = dataset.train_test_split(test_size=0.4) # this changes based on the dataset

dataset_train = dct_dataset['train']
dataset_test = dct_dataset['test']

  from .autonotebook import tqdm as notebook_tqdm
Found cached dataset csv (/home/jan_cegin/.cache/huggingface/datasets/csv/default-039df806f9f61e43/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


In [3]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased")


def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


tokenized_datasets = dataset_train.map(tokenize_function, batched=True)
tokenized_test_datasets = dataset_test.map(tokenize_function, batched=True)

                                                                                                                                                                  

In [4]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-large-uncased", num_labels=6)

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

In [5]:
from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="bert_taboo")

import numpy as np
import evaluate

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="bert_taboo", evaluation_strategy="no", save_strategy="no", per_device_eval_batch_size=32, per_device_train_batch_size=4, learning_rate=1e-5, num_train_epochs=5, save_steps=3000)

In [6]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    compute_metrics=compute_metrics
)

#### evaluating on different data

different datasets are used based on the data, e.g. for GPT train we dont evaluate on GPT filt (without taboo samples) separately, because it does not make sense

In [7]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 2652
  Num Epochs = 5
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 3315
  Number of trainable parameters = 335148038


Step,Training Loss
500,0.6994
1000,0.1298
1500,0.0699
2000,0.0234
2500,0.0086
3000,0.0022




Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=3315, training_loss=0.14081988006066054, metrics={'train_runtime': 1998.6648, 'train_samples_per_second': 6.634, 'train_steps_per_second': 1.659, 'total_flos': 1.235757689008128e+16, 'train_loss': 0.14081988006066054, 'epoch': 5.0})

In [8]:
trainer.evaluate(tokenized_test_datasets)

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1768
  Batch size = 32


{'eval_loss': 0.06197355315089226,
 'eval_accuracy': 0.9903846153846154,
 'eval_runtime': 77.4275,
 'eval_samples_per_second': 22.834,
 'eval_steps_per_second': 0.723,
 'epoch': 5.0}

In [9]:
dataset_orig_test = load_dataset("csv", data_files="challenge_data/atis/full_atis_orig_test.csv", split='train', keep_default_na=False)


dataset = load_dataset("csv", data_files="challenge_data/atis/full_atis_human.csv", split='train', keep_default_na=False)

dct_dataset = dataset.train_test_split(test_size=0.2)

dataset_human_test = dct_dataset['test']


dataset = load_dataset("csv", data_files="challenge_data/atis/full_atis_gpt.csv", split='train', keep_default_na=False)

dct_dataset = dataset.train_test_split(test_size=0.2)

dataset_gpt_test = dct_dataset['test']



dataset = load_dataset("csv", data_files="challenge_data/atis/full_atis_filt_gpt.csv", split='train', keep_default_na=False)

dct_dataset = dataset.train_test_split(test_size=0.2)

dataset_gpt_test_filt = dct_dataset['test']

Found cached dataset csv (/home/jan_cegin/.cache/huggingface/datasets/csv/default-9c4e2671d2ed7fe2/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Found cached dataset csv (/home/jan_cegin/.cache/huggingface/datasets/csv/default-79efc8bf879faff6/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Loading cached split indices for dataset at /home/jan_cegin/.cache/huggingface/datasets/csv/default-79efc8bf879faff6/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-ad72a84db2bdf0c5.arrow and /home/jan_cegin/.cache/huggingface/datasets/csv/default-79efc8bf879faff6/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-2d0f07840078757d.arrow
Found cached dataset csv (/home/jan_cegin/.cache/huggingface/datasets/csv/default-706f538ba598c911/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Loading cached split indices for dataset at /home/jan_cegin/.cache/huggingface/datasets/csv/d

In [10]:
tokenized_dataset_orig_t = dataset_orig_test.map(tokenize_function, batched=True)
tokenized_dataset_h_t= dataset_human_test.map(tokenize_function, batched=True)
tokenized_dataset_gpt = dataset_gpt_test.map(tokenize_function, batched=True)
tokenized_dataset_gpt_filt = dataset_gpt_test_filt.map(tokenize_function, batched=True)

Loading cached processed dataset at /home/jan_cegin/.cache/huggingface/datasets/csv/default-9c4e2671d2ed7fe2/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-d1020f3c55d4f794.arrow
Loading cached processed dataset at /home/jan_cegin/.cache/huggingface/datasets/csv/default-79efc8bf879faff6/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-6c808c704a6a4214.arrow
Loading cached processed dataset at /home/jan_cegin/.cache/huggingface/datasets/csv/default-706f538ba598c911/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-c38e38d3027acb7a.arrow
Loading cached processed dataset at /home/jan_cegin/.cache/huggingface/datasets/csv/default-039df806f9f61e43/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-89359e08b9dd5bba.arrow


In [11]:
trainer.evaluate(tokenized_dataset_orig_t)

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 759
  Batch size = 32


{'eval_loss': 0.6873182654380798,
 'eval_accuracy': 0.9011857707509882,
 'eval_runtime': 33.1917,
 'eval_samples_per_second': 22.867,
 'eval_steps_per_second': 0.723,
 'epoch': 5.0}

In [12]:
trainer.evaluate(tokenized_dataset_h_t)

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 461
  Batch size = 32


{'eval_loss': 0.3780781328678131,
 'eval_accuracy': 0.9544468546637744,
 'eval_runtime': 20.226,
 'eval_samples_per_second': 22.792,
 'eval_steps_per_second': 0.742,
 'epoch': 5.0}

In [13]:
#trainer.evaluate(tokenized_dataset_gpt)

In [14]:
#trainer.evaluate(tokenized_dataset_gpt_filt)