# Prepare Experiment & Deepspeed config (**MANDATORY**)
***

In [1]:
ds_config = {
    "fp16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },
    "optimizer": {
        "type": "AdamW",
        "params": {
            "lr": "auto",
            "betas": "auto",
            "eps": "auto",
            "weight_decay": "auto"
        }
    },

    "zero_optimization": {
        "stage": 2,
        "allgather_partitions": True,
        "allgather_bucket_size": 5e8,
        "overlap_comm": True,
        "reduce_scatter": True,
        "reduce_bucket_size": 5e8,
        "contiguous_gradients": True,
        "offload_optimizer": {
            "device": "none",
        },
        "offload_params": {
            "device": "none"
        },
    },
    "gradient_accumulation_steps": "auto",
    "gradient_clipping": "auto",
    "steps_per_print": 200,
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "wall_clock_breakdown": False
}

training_args = {
    "num_train_epochs": 4,
    "gradient_accumulation_steps": 1,
    "per_device_train_batch_size": 128,
    "per_device_eval_batch_size": 256,
    "fp16": True,
    "weight_decay": 0.0,
    "warmup_steps": 0,
    "learning_rate": 1e-5,
    "logging_strategy": "epoch",
    "evaluation_strategy": "epoch",
    "save_strategy": "epoch",
    "save_total_limit": 1,
    "load_best_model_at_end": True,
    "metric_for_best_model": "eval_accuracy",
    "greater_is_better": True,
}

# usually overriden by external config:
num_gpus = 1
model_name = "data/models/polarity/bert-base-uncased/bs128_lr_1e-05/checkpoint-678/"
tokenizer_name = "bert-base-uncased"
block_size = 128
logdir = "data/models/polarity/bert-base-uncased/bs128_lr_1e-05/"
override_logdir = True

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import numpy as np
import pandas as pd
from datasets import load_dataset
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import datasets
from transformers import Trainer, TrainingArguments

2022-06-24 21:13:14.902592: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


# Tokenize the dataset
***

In [None]:
if "Eleuther" in model_name:
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, bos_token='<|startoftext|>', 
                                          eos_token='<|endoftext|>', pad_token='<|pad|>')
else:
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

In [None]:
def load_social_chem101():
    a = pd.read_csv("data/social-chem-101/social-chem-101.v1.0.tsv", sep="\t")
    return a

social_chem = load_social_chem101()
social_chem = social_chem[social_chem["split"] == "train"]
social_chem = social_chem.dropna(subset=["rot-categorization", "rot-judgment", "action", "rot-agree", "action-moral-judgment"])
social_chem = social_chem[social_chem["rot-agree"] >= 3.0]
social_chem = social_chem[social_chem["rot-bad"] == 0]
social_chem = social_chem[social_chem["rot-categorization"].apply(lambda x: "morality-ethics" in x or "social-norms" in x)]
social_chem = social_chem[social_chem["rot-judgment"].apply(lambda x: "{" not in x)]

In [None]:
def load_action_norm_split(path):
    train, dev, test = [pd.read_json(f"{path}{x}.jsonl", lines=True) for x in ["train", "dev", "test"]]

    # construct dataframes that can actually be used
    assign_action = lambda x: x["moral_action"] if x["label"] == 1 else x["immoral_action"]
    train["action"] = train.apply(assign_action, axis=1)
    dev["action"] = dev.apply(assign_action, axis=1)
    test["action"] = test.apply(assign_action, axis=1)
    return train, dev, test

# used for testing
train, dev, test = load_action_norm_split("data/contrastive_moral_stories/original_ms/action+norm/norm_distance/")
opt_train, opt_dev, opt_test = load_action_norm_split("data/contrastive_moral_stories/optional_ms/action+norm/norm_distance/")
anti_train, anti_dev, anti_test = load_action_norm_split("data/contrastive_moral_stories/anti_ms/action+norm/norm_distance/")
#contra_train, contra_dev, contra_test = load_action_norm_split("data/contrastive_moral_stories/contra_ms/action+norm/norm_distance/")

In [None]:
# we need labels for the norms...
# obligatories: 1
# impermissibles: 0
# neutral: 2

test = test.drop_duplicates("norm").merge(social_chem, left_on="norm", right_on="rot").drop_duplicates("norm")[["ID", "norm", "action-moral-judgment"]]
test["label"] = (test["action-moral-judgment"] > 0).astype("int")

anti_test = anti_test.drop_duplicates("norm").merge(test[["ID", "action-moral-judgment"]], on="ID")

anti_test["action-moral-judgment"] = anti_test["action-moral-judgment"].apply(lambda x: -1 * x)
anti_test = anti_test[["ID", "norm", "action-moral-judgment"]]
# positive judgment implies obligatory norms...
anti_test["label"] = (anti_test["action-moral-judgment"] > 0).astype("int")

t = set(anti_test["ID"])
opt_test = opt_test[opt_test["ID"].apply(lambda x: x in t)][["ID", "norm"]]
opt_test["label"] = 2

contra_test = pd.concat([test, anti_test, opt_test])

In [7]:
dataset = datasets.DatasetDict()
dataset["original_ms"] = datasets.Dataset.from_pandas(test)
dataset["anti_ms"] = datasets.Dataset.from_pandas(anti_test)
dataset["optional_ms"] = datasets.Dataset.from_pandas(opt_test)
dataset["contra_ms"] = datasets.Dataset.from_pandas(contra_test)

# Load the model

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(model_name).cuda()

model.resize_token_embeddings(len(tokenizer))

if model.config.pad_token_id is None:
    model.config.pad_token_id = tokenizer.pad_token_id

model = model.eval()

In [9]:
batch = dataset["original_ms"].select(range(10))

In [10]:
import torch

def run_batch(batch):
    x = tokenizer(batch["norm"], padding="max_length", return_tensors="pt")
    x = {k:v.cuda() for k,v in x.items()}
    out = model(**x)
    y_pred= torch.argmax(out.logits, axis=1).cpu().tolist()
    return {"y_pred": y_pred}

In [11]:
results = dataset.map(run_batch, batched=True, batch_size=32)

  0%|          | 0/32 [00:00<?, ?ba/s]

  0%|          | 0/32 [00:00<?, ?ba/s]

  0%|          | 0/32 [00:00<?, ?ba/s]

  0%|          | 0/94 [00:00<?, ?ba/s]

In [21]:
for k in dataset.keys():
    print("Acc for split",k,":", (np.array(results[k]["label"]) == np.array(results[k]["y_pred"])).mean())

Acc for split original_ms : 0.978
Acc for split anti_ms : 0.961
Acc for split optional_ms : 0.992
Acc for split contra_ms : 0.977


In [26]:
for split in dataset.keys():
    results[split].to_pandas().to_csv(f"{logdir}{split}.csv")