In [1]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import numpy as np
import pandas as pd
from datasets import load_dataset
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import datasets
from transformers import Trainer, TrainingArguments

pd.set_option('display.max_colwidth', 400)

2022-06-19 23:23:31.801021: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
def load_action_norm_split(path):
    train, dev, test = [pd.read_json(f"{path}{x}.jsonl", lines=True) for x in ["train", "dev", "test"]]

    # construct dataframes that can actually be used
    assign_action = lambda x: x["moral_action"] if x["label"] == 1 else x["immoral_action"]
    train["action"] = train.apply(assign_action, axis=1)
    dev["action"] = dev.apply(assign_action, axis=1)
    test["action"] = test.apply(assign_action, axis=1)

    subset = ["norm", "action", "label"]
    train = train[subset]
    dev = dev[subset]
    test = test[subset]
    return train, dev, test


train, dev, test = load_action_norm_split("data/contrastive_moral_stories/original_ms/action+norm/norm_distance/")
opt_train, opt_dev, opt_test = load_action_norm_split("data/contrastive_moral_stories/optional_ms/action+norm/norm_distance/")
anti_train, anti_dev, anti_test = load_action_norm_split("data/contrastive_moral_stories/anti_ms/action+norm/norm_distance/")
contra_train, contra_dev, contra_test = load_action_norm_split("data/contrastive_moral_stories/contra_ms/action+norm/norm_distance/")


In [3]:
dataset = datasets.DatasetDict()

# (N, A_M, 1)
# normal norms, moral actions: test dataset
# (N, A_I, 0)
dataset["original_ms"] = datasets.Dataset.from_pandas(test)

# (ON, A_M, 1)
# optional norms, normal actions: optional dataset
# (ON, A_I, 1)
dataset["optional_ms"] = datasets.Dataset.from_pandas(opt_test)

# (~N, A_M, 0)
# (~N, A_I, 1)
# anti_norms, negated labels
dataset["anti_ms"] =  datasets.Dataset.from_pandas(anti_test)

# everything above stacked
dataset["contra_ms"] =  datasets.Dataset.from_pandas(contra_test)

In [None]:
from datasets import load_metric
metric = load_metric('accuracy')

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

def tokenize(samples):
    return tokenizer(samples["action"], samples["norm"], truncation=True, padding="max_length", max_length=128)

tokenizer = AutoTokenizer.from_pretrained("roberta-large")

tokenized_data = dataset.map(tokenize, batched=True, batch_size=10000)

In [None]:
training_args = TrainingArguments(
    output_dir="data/trash",
    logging_dir="data/trash",
    per_device_eval_batch_size=128
)

In [9]:
models = {
    "original_ms":"data/models/roberta-large/original_ms/bs16_lr_1e-05/checkpoint-1250/",
    "optional_ms":"data/models/roberta-large/optional_ms/bs16_lr_1e-05/checkpoint-1249/",
    "anti_ms":    "data/models/roberta-large/anti_ms/bs16_lr_1e-05/checkpoint-4996/",
    "contra_ms":  "data/models/roberta-large/contra_ms/bs16_lr_1e-05/checkpoint-14992/",
}

results = pd.DataFrame(index=models.keys(), columns=tokenized_data.keys())

for model_name, model_path in models.items():
    model = AutoModelForSequenceClassification.from_pretrained(model_path)

    trainer = Trainer(
        model=model,
        args=training_args,
        compute_metrics=compute_metrics,
    )
    for split, data in tokenized_data.items():
        r = trainer.evaluate(data)
        results.loc[model_name][split] = r["eval_accuracy"]

loading configuration file data/models/roberta-large/original_ms/bs16_lr_1e-05/checkpoint-1250/config.json
Model config RobertaConfig {
  "_name_or_path": "data/models/roberta-large/original_ms/bs16_lr_1e-05/checkpoint-1250/",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float16",
  "transformers_version": "4.19.2",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading weights file data/models/roberta-large/original_ms/bs16_lr_1e-05/

The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Ev

The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Ev

The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Ev

The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: action, norm. If action, norm are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Ev

In [10]:
results

Unnamed: 0,original_ms,optional_ms,anti_ms,contra_ms
original_ms,0.9055,0.521,0.421,0.615833
optional_ms,0.5,1.0,0.5,0.666667
anti_ms,0.274,0.52,0.9105,0.568167
contra_ms,0.8875,0.998,0.8965,0.927333
