# Flipping the whole Moral Stories dataset
***

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm

pd.set_option('display.max_colwidth', 400)

import os

def save_splits(dataframe, folder):
    g = dataframe.groupby("split")
    train = g.get_group("train")
    dev = g.get_group("dev")
    test = g.get_group("test")
    os.makedirs(folder, exist_ok=True)

    with open(folder+"train.jsonl", "w") as f:
        f.write(train.to_json(orient="records", lines=True))

    with open(folder+"dev.jsonl", "w") as f:
        f.write(dev.to_json(orient="records", lines=True))

    with open(folder+"test.jsonl", "w") as f:
        f.write(test.to_json(orient="records", lines=True))


def load_action_norm_split(path):
    train, dev, test = [pd.read_json(f"{path}{x}.jsonl", lines=True) for x in ["train", "dev", "test"]]
    train["split"] = "train"
    dev["split"] = "dev"
    test["split"] = "test"
    return train, dev, test

def load_social_chem101():
    a = pd.read_csv("data/social-chem-101/social-chem-101.v1.0.tsv", sep="\t")
    return a

train, dev, test = load_action_norm_split("data/contrastive_moral_stories/original_ms/action+norm/norm_distance/")
moral_stories = pd.concat([train, dev, test])
social_chem = load_social_chem101()
social_chem = social_chem[social_chem["split"] == "train"]
social_chem = social_chem.dropna(subset=["rot-categorization", "rot-judgment", "action", "rot-agree", "action-moral-judgment"])
social_chem = social_chem[social_chem["rot-agree"] >= 3.0]
social_chem = social_chem[social_chem["rot-bad"] == 0]
social_chem = social_chem[social_chem["rot-categorization"].apply(lambda x: "morality-ethics" in x or "social-norms" in x)]
social_chem = social_chem[social_chem["rot-judgment"].apply(lambda x: "{" not in x)]
print(len(social_chem))

103520


In [3]:
norms = moral_stories[["norm"]].drop_duplicates()

mssc = norms.merge(social_chem[["rot", "rot-judgment", "action", "action-moral-judgment"]], left_on="norm", right_on="rot")
print("After joining, we retain", len(mssc["norm"].unique()), f"norms from Moral Stories ({len(norms)})")
groups = mssc.groupby("norm", as_index=False)
# some norms have multiple join partners, we take the first occurences
mssc = groups.nth(0)
mssc = mssc.rename(columns={"action":"rot-action"})
mssc.head(5)

After joining, we retain 11986 norms from Moral Stories (12000)


Unnamed: 0,norm,rot,rot-judgment,rot-action,action-moral-judgment
0,It is good to earn income to support your family,It is good to earn income to support your family,It is good,earning income to support your family,1.0
1,It's good to help out your family with the laundry.,It's good to help out your family with the laundry.,It's good,helping out your family with the laundry.,1.0
2,You should help out kids who have abusive parents.,You should help out kids who have abusive parents.,should,helping out kids who have abusive parents.,1.0
3,It's bad to draw on walls with crayons.,It's bad to draw on walls with crayons.,It's bad,drawing on walls with crayons.,-1.0
4,It is bad to sleep in a public bathroom.,It is bad to sleep in a public bathroom.,it's bad,sleeping in a public bathroom,-1.0


# Creating optional norms
***
Deontic logic dictates that opposites of obligatory (impermissible) norms are omissible (permissible) instead of impermissible (obligatory).
Since the permissible operator can be expressed as obligatory OR optional, we derive two statements per norm:
* Original, impermissible: "You must not kill"
* Optional: "You may kill"
* Obligatory: "You must kill"

And for obligatory statements:
* Obligatory: "You should be nice to people"
* Optional: "It is okay not to be nice to people"
* Impermissible: "You should not be nice to people"

For this purpose, we gather textual human judgments representing the respective operator:
* Obligatory: "You have to", "It is good", "It is necessary"
* Optional: "You may", "It is okay to", "It is acceptable to"
* Impermissible: "You should not", "It is bad", "It is inacceptable"

The judgments are extracted from Social Chemistry 101 where human raters were tasked to judge the severeness of normative or norm-divergent actions w.r.t. a norm. E.g.
* "It is bad to" --> -2 (very bad), -1 (bad)
* "You should" --> 2 (very good), 1 (good)
* "It is acceptable to" --> 0 (okay)

In [4]:
# create csvs of candidates for optionality!
optional_sc101 = social_chem[social_chem["action-moral-judgment"] == 0]
optional_sc101 = optional_sc101[optional_sc101["rot-agree"] >= 4.0]
#optional_sc101 = optional_sc101[["rot", "rot-judgment", "action", "action-moral-judgment"]]
optional_sc101["stripped"] = optional_sc101["rot-judgment"].apply(lambda x: x.lower().removeprefix("it's").removeprefix("it is").removeprefix("is").strip())
unique_optional = optional_sc101.drop_duplicates("stripped")
print("Found", len(unique_optional), "unique optional judgments")
unique_optional["stripped"].to_csv("data/operator_mappings/optional_candidates.csv")
#unique_optional["stripped"].to_list()

Found 447 unique optional judgments


In [5]:
impermissible_sc101 = social_chem[social_chem["action-moral-judgment"] < 0]
impermissible_sc101 = impermissible_sc101[impermissible_sc101["rot-agree"] >= 4.0]
impermissibles = impermissible_sc101["rot-judgment"]
print("Found", len(impermissibles), "non-unique impermissible judgments")

obligatory_sc101 = social_chem[social_chem["action-moral-judgment"] > 0]
obligatory_sc101 = obligatory_sc101[obligatory_sc101["rot-agree"] >= 4.0]
obligatories = obligatory_sc101["rot-judgment"]
print("Found", len(obligatories), "non-unique obligatories judgments")

# read in optionals after human extraction
optionals = pd.read_csv("data/operator_mappings/optional_rated.csv", delimiter=";").dropna()
optionals = optionals.set_index("index").merge(optional_sc101, left_on="rot-judgment", right_on="stripped", suffixes=("_new","_old"))["rot-judgment_old"]
optionals.name = "rot-judgment"
print("Found", len(optionals), "non-unique optional judgments")

# filter out non used optionals
t = set(optionals)
optional_sc101 = optional_sc101[optional_sc101["rot-judgment"].apply(lambda x: x in t)]

Found 17262 non-unique impermissible judgments
Found 8358 non-unique obligatories judgments
Found 3288 non-unique optional judgments


In [6]:
# save to file
impermissible_sc101.to_csv("data/operator_mappings/impermissible_sc101.csv", index=False)
obligatory_sc101.to_csv("data/operator_mappings/obligatory_sc101.csv", index=False)
optional_sc101.to_csv("data/operator_mappings/optional_sc101.csv", index=False)

In [7]:
print(f"Found {len(impermissibles.unique())} unique impermissible judgments")
print(f"Found {len(obligatories.unique())} unique obligatory judgments")
print(f"Found {len(optionals.unique())} unique optional judgments")

Found 1181 unique impermissible judgments
Found 497 unique obligatory judgments
Found 51 unique optional judgments


### Flipping the judgment part into non-optional statements
***

In [8]:
def flip_non_optional(row):
    # create non-optional norm from non-optionals
    if row["action-moral-judgment"] < 0:
        # we're impermissible, choose from permissibles
        return obligatories.sample(1).iloc[0]
    elif row["action-moral-judgment"] > 0:
        # we're obligatory, choose from impermissibles
        return impermissibles.sample(1).iloc[0]
    else:
        raise ValueError("Cannot flip optional judgments. Use 'flip_optional' instead")

non_optional_flipped = mssc.copy()
non_optional_flipped["flipped-rot-judgment"] = mssc.apply(flip_non_optional,axis=1)

In [9]:
def flip_optional(row):
    # create an optional norm from a non-optional
    return optionals.sample(1).iloc[0]

optional_flipped = mssc.copy()
optional_flipped["flipped-rot-judgment"] = optionals.sample(len(mssc), replace=True, ignore_index=True).to_list()

## Generating ROTs
***
Using the rot-generator models!

In [9]:
import numpy as np
import pandas as pd
from datasets import load_dataset
import time
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import datasets
from tqdm import tqdm
import nltk

model_name = "data/models/rot-generator/facebook/bart-large/bs16_lr_3e-05/checkpoint-30850/"
block_size = 128

tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=block_size)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).cuda()

In [10]:
import torch
torch.manual_seed(42)

def flip_rot(sample):
    x = tokenizer(sample["flipped-rot-judgment"].to_list(), sample["rot-action"].to_list(), 
                  padding=True, return_tensors="pt")
    x = {k:v.cuda() for k,v in x.items()}
    y = model.generate(**x, min_length=1, max_length=128, top_p=0.95, top_k=50, num_beams=10, temperature=1)
    return tokenizer.batch_decode(y, skip_special_tokens=True)

In [11]:
# derive optional norms

# batch_size 100 requires ~35gb of vram on a single gpu
optional_rots = []
batch_size = 32
for i in tqdm(range(0, len(optional_flipped), batch_size)):
    optional_rots += flip_rot(optional_flipped.iloc[i:i+batch_size])

optional_flipped["flipped-rot"] = optional_rots

  0%|          | 0/375 [00:00<?, ?it/s]2022-06-23 00:08:40.993571: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
100%|██████████| 375/375 [06:19<00:00,  1.01s/it]


In [12]:
# derive non optional norms

non_optional_rots = []
for i in tqdm(range(0, len(non_optional_flipped), batch_size)):
    non_optional_rots += flip_rot(non_optional_flipped.iloc[i:i+batch_size])

non_optional_flipped["flipped-rot"] = non_optional_rots

100%|██████████| 375/375 [06:33<00:00,  1.05s/it]


# Saving to disk
***

In [13]:
anti_ms = non_optional_flipped.merge(moral_stories)
# swap moral and immoral actions
anti_ms = anti_ms.rename(columns={"moral_action":"immoral_action", 
                                    "immoral_action":"moral_action",
                                    "norm": "flipped-rot",
                                    "flipped-rot": "norm",
})
# flip labels
anti_ms["label"] = anti_ms["label"].apply(lambda x: int(not x))
save_splits(anti_ms.drop(["flipped-rot", "rot"], axis=1), "data/contrastive_moral_stories/anti_ms_splits_only/action+norm/norm_distance/")

In [18]:
# how do we save the optional norms?
# technically, they only have one label as everything is moral w.r.t. to an optional norm...
# but: there only is one column for moral actions, what do we do?
# but but: original moral stories has two rows per norm anyway: one per each action with the missing one replaced by NaN
# proposal: we do the same, but just have only moral actions to contain non NaN entries
opt_flip_ms = optional_flipped.merge(moral_stories)
opt_flip_ms["moral_action"] = opt_flip_ms.apply(lambda x: x["immoral_action"] if pd.isna(x["moral_action"]) else x["moral_action"], axis=1)
opt_flip_ms["immoral_action"] = np.NaN
opt_flip_ms["label"] = 1
opt_flip_ms["norm"] = opt_flip_ms["flipped-rot"]
opt_flip_ms = opt_flip_ms[moral_stories.columns]
save_splits(opt_flip_ms[moral_stories.columns], "data/contrastive_moral_stories/optional_ms/action+norm/norm_distance/")

In [19]:
# saving the non-optional is straightforward: simply flip the labels.
anti_ms = non_optional_flipped.merge(moral_stories)
# swap moral and immoral actions
anti_ms = anti_ms.rename(columns={"moral_action":"immoral_action", 
                                    "immoral_action":"moral_action",
                                    "norm": "flipped-rot",
                                    "flipped-rot": "norm",
})
# flip labels
anti_ms["label"] = anti_ms["label"].apply(lambda x: int(not x))
save_splits(anti_ms, "data/contrastive_moral_stories/anti_ms_splits_only/action+norm/norm_distance/")
anti_ms = anti_ms[moral_stories.columns]
save_splits(anti_ms[moral_stories.columns], "data/contrastive_moral_stories/anti_ms/action+norm/norm_distance/")

# Create contrastive/contradictory/paradox split
***
...by stacking moral stories + optional norms + non optional flips!

In [10]:
train, dev, test = load_action_norm_split("data/contrastive_moral_stories/original_ms/action+norm/norm_distance/")
opt_train, opt_dev, opt_test = load_action_norm_split("data/contrastive_moral_stories/optional_ms/action+norm/norm_distance/")
non_opt_train, non_opt_dev, non_opt_test = load_action_norm_split("data/contrastive_moral_stories/anti_ms/action+norm/norm_distance/")


contra_train = pd.concat([train, opt_train, non_opt_train])
contra_dev = pd.concat([dev, opt_dev, non_opt_dev])
contra_test = pd.concat([test, opt_test, non_opt_test])
contra_ms = pd.concat([contra_train, contra_dev, contra_test])

folder = "data/contrastive_moral_stories/contra_ms/action+norm/norm_distance/"
save_splits(contra_ms[moral_stories.columns], folder)

# Create sample of flipped norms for crowd labeling
***

In [22]:
n = 500
train, dev, test = load_action_norm_split("data/contrastive_moral_stories/original_ms/action+norm/norm_distance/")
opt_train, opt_dev, opt_test = load_action_norm_split("data/contrastive_moral_stories/optional_ms/action+norm/norm_distance/")
non_opt_train, non_opt_dev, non_opt_test = load_action_norm_split("data/contrastive_moral_stories/anti_ms/action+norm/norm_distance/")

flipped_train = pd.concat([opt_train, non_opt_train])
flipped_dev = pd.concat([opt_dev, non_opt_dev])
flipped_test = pd.concat([opt_test, non_opt_test])

# stack all flipped norms
flipped_norms = pd.concat([flipped_train, flipped_dev, flipped_test])[["norm", "ID"]].drop_duplicates("norm")
flipped_norms.rename(columns={"norm":"flipped-norm"}, inplace=True)

# get original norms
norms = pd.concat([train, dev, test])[["norm", "ID"]].drop_duplicates("norm")

combined = flipped_norms.merge(norms, on="ID")[["ID","norm","flipped-norm"]]
sample = combined.sample(n)

sample.to_csv("data/contrastive_moral_stories/flipped_sample.csv")

## Manual examination of hard samples
***


In [None]:
mssc[mssc["norm"].apply(lambda x: x in {"Cheating is frowned upon.", "It's expected that partners will be loyal to one another."})]

Unnamed: 0,norm,rot,rot-judgment,rot-action,action-moral-judgment,flipped-rot-judgment,flipped-rot
665,It's expected that partners will be loyal to one another.,It's expected that partners will be loyal to one another.,it's expected,being loyal to one another.,-1.0,it's important,It's important that partners are loyal to one another.
3732,Cheating is frowned upon.,Cheating is frowned upon.,is frowned upon,Cheating,-1.0,It's good,It's good to cheat.


In [24]:
hard_samples = set('''It is mean to keep grandparents from seeing their grandchildren.
It's rude to come over without permission.
It's wrong to ignore someone after a one-night stand.
It's wrong to judge someone for having HIV.
Partners should be willing to do favors for each other.
You shouldn't make your friends jealous.
You shouldn't stop communicating with a friend.
'''.split("\n"))
hs = non_optional_flipped[non_optional_flipped["norm"].apply(lambda x: x in hard_samples)].copy()

x = tokenizer(hs["flipped-rot-judgment"].to_list(), hs["rot-action"].to_list(), 
              padding="max_length", return_tensors="pt")
x = {k:v.cuda() for k,v in x.items()}
y = model.generate(**x, min_length=1, max_length=128, top_p=0.95, top_k=50, num_beams=5, temperature=1, do_sample=True)
hs["translated"] = tokenizer.batch_decode(y, skip_special_tokens=True)
hs[["norm", "translated"]]

Unnamed: 0,norm,translated
860,Partners should be willing to do favors for each other.,It is wrong for partners to be willing to do favors for each other
4000,It's wrong to ignore someone after a one-night stand.,It's good to ignore someone after a one-night stand.
4193,You shouldn't stop communicating with a friend.,It's rude to communicate with a friend.
9831,You shouldn't make your friends jealous.,It's good to make your friends jealous.
10655,It's rude to come over without permission.,It's allowed to come over without permission.
12550,It is mean to keep grandparents from seeing their grandchildren.,You should keep grandparents from seeing their grandchildren.
12822,It's wrong to judge someone for having HIV.,It's good to judge someone for having HIV.
