## Task:
***
We want to get a better split of the Moral Stories dataset. Idea: Lets compute sentence embeddings and try to find train/test splits with high degree of separation.

Approach:
* Get embedding vectors of norm_actions, e.g. "hurting somebody".
* Get clusters of the vectors
* Do splitting based on the clusters instead of based on single norms.

In [None]:
%pip install -U sentence-transformers

In [None]:
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer

def assign_norm_clusters(dataframe, embedding_model='all-distilroberta-v1'):
    '''
    Clusters the "norm_action" in the given dataframe according
    to sentence_embeddings from transformer models.
    A new column "cluster" will be assigned to the dataframe.
    KMeans will be employed.
    '''
    model = SentenceTransformer(embedding_model)
    embeddings = model.encode(dataframe["norm_action"], show_progress_bar=True)
    clustering = KMeans(n_clusters=100, init="k-means++", max_iter=300, n_init=5)
    dataframe["cluster"] = clustering.fit_predict(embeddings)
    return dataframe


In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from datasets import Dataset

from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import transformers

import torch

from ailignment.datasets.util import get_accuracy_metric
from ailignment.datasets.moral_stories import make_action_classification_dataframe, get_random_value_dataset

from ailignment.training import sequence_classification

pd.set_option('display.max_colwidth', 400)
dataframe = pd.read_pickle("../data/moral_stories_proto_l2s.dat")

In [2]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-distilroberta-v1')

In [63]:
from sklearn.cluster import KMeans, DBSCAN, OPTICS

clustering = KMeans(n_clusters=100, init="k-means++", max_iter=300, n_init=5)
#clustering = DBSCAN(eps=2.7, min_samples=2, metric="euclidean")
#clustering = OPTICS()

dataframe["cluster"] = clustering.fit_predict(embeddings)

print(np.unique(dataframe["cluster"], return_counts=True)[1])
g = dataframe.groupby("cluster", axis=0)
for i in [-1,0,1,2,3,4]:
    if i in g.groups:
        print("-"*10)
        print(g.get_group(i)["norm_action"])

[ 77 172 108 139 226 131 107 110  94 165  94 150 234  85 153 130 110 116
 141 143 120 115  96 160 102  84 115 136 100  85 102 136 155  86 134 189
  49 151  71 108 192  66  96 121 118 149  93  90  48 146  53  58 104  93
 122 157 185 151  65 111 148 100 115  73 199 103  48 115  77  88 145 109
 141  87 158  70  48  97  86 111 106 248  74 111 116 131 250 111  89 129
  85 123 106 118 153 166 159 161 109 136]
----------
108          taking advantage of a friend's generosity.
170                     doing favors for close friends.
182      thanking friends for inviting you to do things
191               wanting to be friends with murderers.
194                           beg strangers for treats.
                              ...                      
11441                        doing favors for a friend.
11533            outing your friends after their death.
11600                   giving a friend a place to stay
11619                            abadoning you friends.
11792                 

In [65]:
    immoral_df = dataframe.drop(["moral_action", "moral_consequence"], axis=1)
    moral_df = dataframe.drop(["immoral_action", "immoral_consequence"], axis=1)
    # rename columns
    moral_df.rename(columns={"moral_action":"action",
                            "moral_consequence":"consequence"},
                    inplace=True)
    immoral_df.rename(columns={"immoral_action":"action",
                            "immoral_consequence":"consequence"},
                    inplace=True)
    # add labels
    immoral_df["labels"] = 0
    moral_df["labels"] = 1

In [88]:
from sklearn.model_selection import GroupShuffleSplit

gss = GroupShuffleSplit(1, test_size=0.2)
xi, yi = list(gss.split(moral_df, groups=dataframe["cluster"]))[0]

In [91]:
train = pd.concat([moral_df.iloc[xi], immoral_df.iloc[xi]], ignore_index=True).sample(frac=1)

In [97]:
good_values=None
bad_values=None
p=0.5
top_n=20

In [98]:
    if good_values is None or bad_values is None:
        # get frequent norm judgments
        top_negative = dataframe.groupby("norm_sentiment").get_group("NEGATIVE")["norm_value"].value_counts()
        top_positive = dataframe.groupby("norm_sentiment").get_group("POSITIVE")["norm_value"].value_counts()
        good_values = top_positive[:top_n].index
        bad_values = top_negative[:top_n].index

In [108]:
from ailignment.datasets.moral_stories import randomize_norm_value, flip_norm
random_values = dataframe.copy()
random_values = random_values.apply(randomize_norm_value(good_values, bad_values), axis=1)

In [118]:

flip_all = flip_norm(good_values, bad_values, 1.1) # a function that flips everything
flip_none = flip_norm(good_values, bad_values, -1.1) # a function that flips nothing

In [119]:
def flip_cluster(p):
    def t(data):
        if np.random.rand()<=p:
            return data.apply(flip_all, axis=1)
        return data.apply(flip_none, axis=1)
    return t
groups = random_values.groupby("cluster").apply(flip_cluster(0.5))

In [121]:
groups["flipped"].value_counts()

True     6542
False    5454
Name: flipped, dtype: int64

In [2]:
import torch
torch.__version__

'1.10.0+cu102'