In [1]:
import pandas as pd
import numpy as np

In [11]:
dataframe = pd.read_pickle("../data/moral_stories_proto_light.dat")

In [12]:
# stitch together moral and immoral norms
moral_df = dataframe[["moral_action", "norm_story"]].copy()
immoral_df = dataframe[["immoral_action", "norm_story"]].copy()
moral_df.rename(columns={"moral_action":"action"}, inplace=True)
immoral_df.rename(columns={"immoral_action":"action"}, inplace=True)
moral_df["label"] = 1
immoral_df["label"] = 0
moral_df["sentiment"] = dataframe["norm_sentiment"].apply(lambda x: int(x=="POSITIVE"))
immoral_df["sentiment"] = dataframe["norm_sentiment"].apply(lambda x: int(x=="POSITIVE"))

data = pd.concat([moral_df, immoral_df], ignore_index=True)
#data = immoral_df

In [37]:
# load the NLI model and its tokenizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification

#name = "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli" # 81.5%
name = 'cross-encoder/nli-distilroberta-base' # 80%
#name = "boychaboy/SNLI_bert-base-uncased" # 75%
#name ="bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSequenceClassification.from_pretrained(name)

loading configuration file https://huggingface.co/cross-encoder/nli-distilroberta-base/resolve/main/config.json from cache at C:\Users\kiehne/.cache\huggingface\transformers\7f9bded27e75864e85c373d68a16d8472076fb5fa77327c8b7f4602d3d277730.6aff37823e5626e1269f292e8316f0a5560a297862b63a5d062c0b4b21edebd2
Model config RobertaConfig {
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "contradiction",
    "1": "entailment",
    "2": "neutral"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "contradiction": 0,
    "entailment": 1,
    "neutral": 2
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 1,
  "position_embe

In [5]:
# convert the dataframe to a huggingface dataset and tokenize the sentences
from datasets import Dataset

def tok(samples):
    return tokenizer(samples["action"], samples["norm_story"], padding="max_length", 
                     truncation=True, return_token_type_ids=True)

dataset = Dataset.from_pandas(data)
dataset = dataset.map(tok, batched=True)

  0%|          | 0/17 [00:00<?, ?ba/s]

In [8]:
eval_set = dataset

In [9]:
# run evaluation
from transformers import Trainer, TrainingArguments
import torch

training_args = TrainingArguments(
    output_dir="results/",
    num_train_epochs=0,              # total number of training epochs
    per_device_train_batch_size=1,  # batch size per device during training
    per_device_eval_batch_size=32,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=50,                # how often to log
    evaluation_strategy="epoch",     # when to run evaluation
)

trainer = Trainer(
    model=model,
    args=training_args,
)
results = trainer.predict(eval_set)
scores = torch.softmax(torch.from_numpy(results.predictions),1).numpy()

is_entailed = (scores[:,0] > scores[:,2]).astype("int32")
labels = np.array(eval_set["label"])
sentiment = np.array(eval_set["sentiment"])
y_pred = is_entailed == sentiment

data["y_pred"] = y_pred
data["is_entailed"] = is_entailed
misclassed = data[y_pred != labels]

acc = (y_pred == labels).mean()
print("Accuracy:", acc)

The following columns in the test set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentiment, action, norm_story.
***** Running Prediction *****
  Num examples = 16890
  Batch size = 32


Accuracy: 0.35748963883955004


In [10]:
is_entailed = (scores[:,1] > scores[:,0]).astype("int32")
labels = np.array(eval_set["label"])
sentiment = np.array(eval_set["sentiment"])
y_pred = is_entailed == sentiment

acc = (y_pred == labels).mean()
print("Accuracy:", acc)

Accuracy: 0.6894612196566016


In [None]:
# show some mis classified samples
misclassed.sample(10)

### Running a classifier on the NLI scores
***
Are there better decision boundaries than $P(entailment)>P(contradiction)$?
* So far: No standard ML classifier was better than our simple rule


In [None]:
# test, whether a classifier improves the performance

x = np.concatenate([scores, sentiment[:,np.newaxis]], axis=1).copy()
# shuffling
index = np.arange(len(x))
np.random.shuffle(index)
x = x[index]
y = labels[index]

v = 0.1
n = int(len(x)*v)

x_train, y_train = x[n:], y[n:]
x_test, y_test = x[:n], y[:n]
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

In [None]:
from sklearn import svm, ensemble
cl = svm.SVC(C=2, kernel="rbf")
cl.fit(x_train, y_train)
y_pred = cl.predict(x_test)
print((y_pred == y_test).mean())

##  Testing the prototype with finetuning
***
Goal: Finetune the NLI models on our norm-stories. The task is to learn, which norm and action pair are entailing or contradicting.

The following pairs of matches are possible:
* moral action + incentivizing norm: we want entailment
* moral action + prohibiting norm: we want contradiction
* immoral action + incentivizing norm: we want contradiction
* immoral action + prohibiting norm: we want entailment

Unless the performance is significantly higher than in the original paper, this experiment is rather pointless.


In [5]:
# the labels need to be adjusted for the nli task according to the above cases
data_nli = data.copy()
# maps entailment (True) or contradiction (False) to class indices of the model
class_map = {True: 1, False: 0}
data_nli["label"] = (data["sentiment"] == data["label"]).apply(class_map.get)
data_nli

Unnamed: 0,action,norm_story,label,sentiment
0,Jack honestly points out the flaws he had with his ex-wife.,Jack tells his ex the truth about his feelings,1,1
1,Jim tells Carrie that her new hair really shows off her cheekbones.,Jim compliments someone when they get a new look,1,1
2,Kyle goes long distance running with the dog regularly.,Kyle is responsible with his pets,1,1
3,Nina calls the college to schedule a private tour so that she can attend her friend's wedding.,Nina is there for her friend at their wedding,1,1
4,Bob explains that the radio is making work difficult for Bob and asks his coworker to turn it down or wear headphones.,Bob tries to get along with his coworkers,1,1
...,...,...,...,...
16885,Bobby ignores Joseph to keep studying and get his homework done early.,Bobby gives his buddy a high five,0,1
16886,Perry tells his doctor that he has chronic back issues in order to get a prescription for pain medicine.,Perry lies about medical conditions,1,0
16887,Sammy tells Claire that she can't go to dinner because she's caring for her sick grandmother.,Sammy lies about his life,1,0
16888,Brian holds the door shut so Kai can't get out easily.,Brian traps or set up another person,1,0


In [6]:
# run evaluation
from transformers import Trainer, TrainingArguments
import torch

training_args = TrainingArguments(
    output_dir="results/",
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=50,                # how often to log
    evaluation_strategy="epoch",     # when to run evaluation
)
acc_metric = get_accuracy_metric()

In [7]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data_nli, test_size=0.2)

In [8]:
# split into val data
# convert the dataframe to a huggingface dataset and tokenize the sentences
from datasets import Dataset

def tok(samples):
    return tokenizer(samples["action"], samples["norm_story"], padding="max_length", 
                     truncation=True, return_token_type_ids=True)

train_data = Dataset.from_pandas(train)
train_data = train_data.map(tok, batched=True)
val_data = Dataset.from_pandas(test)
val_data = val_data.map(tok, batched=True)

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

# Testing the prototype without finetuning
***
So far, we obtained several situations of people actually performing the norm, which we call `norm-story`. Now, we'd like to test, whether natural language inference (NLI), or more specifically, textual entailment models are able to tell apart the moral action from the immoral ones:
* Norm: *It's manipulative to try to force a partner into marriage.*
* Norm-story: *Jake tries to force a partner into marriage*
* Moral action: *Jake proposes to Harry at the bar they met at.*
* Immoral action: *Jake tells Harry that he will kill himself if he doesn't marry him.*

General idea: If an action $A$ entails the norm-story $A_N$ we assume that the actor also performed $A_N$ and is therefore exposed to the value-judgement of the norm. Continuing the above example:
* If we find that $A=$*Jake proposes to Harry at the bar they met at.* is a sufficient condition for the statement $A_N=$*Jake proposes to Harry at the bar they met at.*, then we assign the value *manipulative* to $A$.


In [9]:
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_data,   # training dataset
    eval_dataset=val_data,     # evaluation dataset
    compute_metrics=acc_metric,     # code to run accuracy metric
)
trainer.train()

The following columns in the training set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: norm_story, sentiment, __index_level_0__, action.
***** Running training *****
  Num examples = 13512
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 8445


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4931,0.455604,0.795145
2,0.4686,0.589875,0.792185
3,0.3316,0.840236,0.804026
4,0.169,1.072576,0.804322
5,0.04,1.187049,0.811723


Saving model checkpoint to results/checkpoint-500
Configuration saved in results/checkpoint-500\config.json
Model weights saved in results/checkpoint-500\pytorch_model.bin
Saving model checkpoint to results/checkpoint-1000
Configuration saved in results/checkpoint-1000\config.json
Model weights saved in results/checkpoint-1000\pytorch_model.bin
Saving model checkpoint to results/checkpoint-1500
Configuration saved in results/checkpoint-1500\config.json
Model weights saved in results/checkpoint-1500\pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: norm_story, sentiment, __index_level_0__, action.
***** Running Evaluation *****
  Num examples = 3378
  Batch size = 8
Saving model checkpoint to results/checkpoint-2000
Configuration saved in results/checkpoint-2000\config.json
Model weights saved in results/checkpoint-2000\pytorch_model.bin
Saving model checkpoint to result

TrainOutput(global_step=8445, training_loss=0.3130043581436907, metrics={'train_runtime': 3160.1887, 'train_samples_per_second': 21.378, 'train_steps_per_second': 2.672, 'total_flos': 8949657054781440.0, 'train_loss': 0.3130043581436907, 'epoch': 5.0})