In [3]:
from datasets import load_dataset
from datasets import concatenate_datasets

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.2.1-py3-none-any.whl (255 kB)
[K     |████████████████████████████████| 255 kB 1.3 MB/s eta 0:00:01
Collecting torch>=1.11.0
  Using cached torch-2.2.2-cp38-none-macosx_10_9_x86_64.whl (150.6 MB)
Collecting Pillow
  Downloading pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 3.7 MB/s eta 0:00:01
[?25hCollecting transformers<5.0.0,>=4.41.0
  Downloading transformers-4.46.3-py3-none-any.whl (10.0 MB)
[K     |████████████████████████████████| 10.0 MB 13.1 MB/s eta 0:00:01
Collecting sympy
  Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB)
Collecting networkx
  Using cached networkx-3.1-py3-none-any.whl (2.1 MB)
Collecting jinja2
  Downloading jinja2-3.1.5-py3-none-any.whl (134 kB)
[K     |████████████████████████████████| 134 kB 7.7 MB/s eta 0:00:01
[?25hCollecting regex!=2019.12.17
  Downloading regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl (287 kB)

In [6]:
import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer

In [9]:
from sentence_transformers import SentenceTransformer

In [10]:
from utils import *

In [11]:
from rag import KnowledgeBase
from rag import BeliefGroups

In [12]:
from rag import metric_exact_match
from rag import metric_f1

# Moral Stories

In [13]:
ds_moralstories = load_dataset("demelin/moral_stories", "cls-action+context+consequence-norm_distance")

Generating train split: 100%|██████████| 20000/20000 [00:00<00:00, 166826.92 examples/s]
Generating validation split: 100%|██████████| 2000/2000 [00:00<00:00, 227222.71 examples/s]
Generating test split: 100%|██████████| 2000/2000 [00:00<00:00, 180361.38 examples/s]


In [14]:
train_data = ds_moralstories["train"]
test_data = ds_moralstories["test"]
val_data = ds_moralstories["validation"]

ds_moralstories = concatenate_datasets([train_data, test_data, val_data])
moral_ds = ds_moralstories.filter(lambda x: x['label'] == 1)
immoral_ds = ds_moralstories.filter(lambda x: x['label'] == 0)

zipped = list(zip(moral_ds, immoral_ds))

ds = subset_ds(zipped, 1/20)


Filter: 100%|██████████| 24000/24000 [00:00<00:00, 36385.08 examples/s]
Filter: 100%|██████████| 24000/24000 [00:00<00:00, 49358.93 examples/s]


In [None]:
ds.features

AttributeError: 'list' object has no attribute 'ds'

In [21]:
dim = 384
model_embd = SentenceTransformer('all-MiniLM-L6-v2')

#dim = 768
#model_embd = SentenceTransformer("bert-base-nli-mean-tokens").to("cuda:0")

In [22]:
bg = BeliefGroups(("moral", "immoral"))
kb_embed = KnowledgeBase(beliefgroups = bg, dim = 384)

In [23]:
passage_embeddings = []
for elem in ds:
    moral_choice = elem[0]['moral_action']
    immoral_choice = elem[1]['immoral_action']

    moral_embed = model_embd.encode(moral_choice)
    immoral_embed = model_embd.encode(immoral_choice)
    passage_embeddings.append((moral_embed, immoral_embed))

for passage_index, passage_embd in enumerate(passage_embeddings):
    kb_embed.add_item(passage_embd[0], passage_index, bg.groups[0])
    kb_embed.add_item(passage_embd[1], passage_index, bg.groups[1])

In [None]:
recall_moral = 0
recall_immoral = 0
for i, elem in enumerate(ds):
    query = elem[0]['situation'] + ' ' + elem[1]['intention']
    retrieved_moral = kb_embed.retrieve(model_embd.encode(query), 'l2', 'moral', k = 1)
    retrieved_immoral = kb_embed.retrieve(model_embd.encode(query), 'l2', 'immoral', k = 1)
    if(retrieved_moral[0] == i):
        recall_moral += 1
    if(retrieved_immoral[0] == i):
        recall_immoral += 1 

recall = (recall_immoral + recall_moral) / (len(ds) * 2)
recall_moral = recall_moral / len(ds)
recall_immoral = recall_immoral / len(ds)
    

579
579


In [49]:
print(recall)
print(recall_moral)
print(recall_immoral)

0.965
0.965
0.965


# IDEAS (brouillon pour l'instant):
- moral stories: 
    - give moral and immoral choices to model and see how it answers 
    - see if we can retrieve correct passages given situations

- answer moral choice questions, compare with other base LLMs
    - give best 'norm' from moral stories as part of prompt (find best norm by doing RAG)
    - give best situations with their moral solutions
        - in both cases,   