## Setting up the notebook

High-level configs

In [9]:
%reload_ext autoreload
%autoreload 2

from dotenv import load_dotenv

# Load environment variables from .env file. Adjust the path to the .env file as needed.
load_dotenv(dotenv_path='../.env')

# Enable asyncio in Jupyter
import asyncio
import nest_asyncio

nest_asyncio.apply()

#  Add the package to the path (required if you are running this notebook from the examples folder)
import sys
sys.path.append('../../')


Import required packages

In [10]:
import json
import numpy as np
from openai import AsyncOpenAI
import pandas as pd
from pydantic import BaseModel
from tqdm.auto import tqdm

from lattereview.providers import OpenAIProvider
from lattereview.providers import OllamaProvider
from lattereview.providers import LiteLLMProvider
from lattereview.agents import ScoringReviewer
from lattereview.workflows import ReviewWorkflow

## Data

Building five example stories and dummy question-answering pairs from each story:

In [3]:
class BuildStoryOutput(BaseModel):
    story: str
    questions: list[str]
    answers: list[bool]

async def build_story():
    prompt = """
    Write a one-paragraph story with whatever realistic or imaginary theme you like,  
    then create three TRUE/FALSE questions based on your story. 
    Ensure that only readers of your story can determine whether the statements are true or false. 
    Do not reveal the answers to your questions.
    Return your story, a Python list of three questions, and another Python list of three boolean responses to the questions as your output.
    """
    provider = OpenAIProvider(model="gpt-4o", response_format_class=BuildStoryOutput)
    return await provider.get_json_response(prompt, temperature=0.9)

def run_build_story():
    response =  asyncio.run(build_story())[0]
    return response

data = {
    "question": [],
    "answer": [],
    "story": []
}
for i in tqdm(range(5)):
    out = json.loads(run_build_story())
    for j in range(3):
        data["question"].append(out["questions"][j])
        data["answer"].append(out["answers"][j])
        data["story"].append(out["story"])


data = pd.DataFrame(data)
# data.to_csv("data.csv", index=False)
data

100%|██████████| 5/5 [00:22<00:00,  4.40s/it]


Unnamed: 0,question,answer,story
0,Lumo was a creature living in a bustling city.,False,Deep in the heart of the Enchanted Forest live...
1,The archway in the story was covered in moss.,True,Deep in the heart of the Enchanted Forest live...
2,Lumo met a fox who was the guardian of the grove.,False,Deep in the heart of the Enchanted Forest live...
3,Did the Whispering Tree have golden leaves?,False,"In the heart of the Enchanted Forest, there st..."
4,Was Elara a young boy?,False,"In the heart of the Enchanted Forest, there st..."
5,Did the tree reveal the secret to finding wealth?,False,"In the heart of the Enchanted Forest, there st..."
6,Jasper lived in a town called Lumina.,True,"In the distant town of Lumina, nestled between..."
7,Athena the clockwork owl was made of gold.,False,"In the distant town of Lumina, nestled between..."
8,A lightning strike caused Athena to startle th...,True,"In the distant town of Lumina, nestled between..."
9,Elias was a fisherman in a bustling city.,False,"In a small coastal village, nestled between th..."


Embedding the stories to build a vector base:

In [11]:
data = pd.read_csv("data.csv")

async def get_embedding(text):
    client = AsyncOpenAI()
    if isinstance(text, str):
        text = [text]
    text = [x.replace("\n", " ") for x in text]         
    out = await client.embeddings.create(
        model="text-embedding-ada-002",
        input=text,
        encoding_format="float"
    )
    out = [np.array(x.embedding) for x in out.data]
    return out if len(out) > 1 else out[0]

stories = {story: None for story in set(data["story"].tolist())}

# Create async tasks for all embeddings
async def process_embeddings():
    tasks = [get_embedding(story) for story in stories.keys()]
    embeddings = await asyncio.gather(*tasks)
    return list(zip(embeddings, stories.keys()))

# Run the async code and get results
vector_story_pairs = await process_embeddings()
vector_base = np.array([x[0] for x in vector_story_pairs])
vector_base

array([[ 3.5781710e-03, -7.0217124e-05, -1.9292142e-02, ...,
         5.2499394e-03, -9.0594860e-03, -4.9872793e-02],
       [-2.6613185e-03,  6.8810517e-03, -1.6191496e-02, ...,
         3.8676716e-03, -6.2136360e-03, -2.4841197e-02],
       [ 3.1101482e-02, -5.1025870e-03, -3.7245888e-03, ...,
        -5.5552630e-03,  7.1962117e-03, -1.8293424e-02],
       [ 1.4296985e-02, -3.9061885e-03,  1.4900517e-02, ...,
        -8.4494380e-03, -4.7377210e-03, -1.5986873e-02],
       [ 1.2298309e-02, -7.1190320e-03, -2.0149698e-02, ...,
        -9.5932090e-03, -5.1264950e-03, -2.7103785e-02]])

## Retrieval

In [12]:
async def find_relevant_story(statement):
    s_embeddings = await get_embedding(statement)
    dot_product = np.dot(vector_base, s_embeddings)
    base_norms = np.linalg.norm(vector_base, axis=1)
    query_norm = np.linalg.norm(s_embeddings)
    cosine_similarities = dot_product / (base_norms * query_norm)
    retrieved_index = np.argmax(cosine_similarities)
    retrieved_story = vector_story_pairs[retrieved_index][1]
    return retrieved_story

input_index = 11
statement = data.iloc[input_index]["question"]
retrieved_story = await find_relevant_story(statement)

print(f"=== The question was chosen from row {input_index} ===\n{statement}")
print(f"=== The related story to the question ===\n{data.iloc[input_index]['story']}")
print(f"=== The retrieved Story ===\n{retrieved_story}")

=== The question was chosen from row 11 ===
The coins Elias found were covered in barnacles.
=== The related story to the question ===
In a small coastal village, nestled between the azure ocean and the emerald hills, lived an old fisherman named Elias. Elias had spent his entire life on the sea, his face weathered by the sun and salt, his hands coarse from years of hauling nets. One day, while fishing near the coral reefs, Elias spotted a glimmer beneath the waves. Diving down, he discovered a chest encrusted with barnacles. Inside, to his astonishment, was a collection of ancient coins, each etched with mysterious symbols. Elias decided to keep his discovery a secret, fearing that others might disturb the sea's tranquility in search of more treasure. As the years passed, he often wondered what stories those coins could tell, as he watched the sun set over the endless horizon.
=== The retrieved Story ===
In a small coastal village, nestled between the azure ocean and the emerald hills

## Scoring with Retrieval Augmented Generation

In [13]:
reviewer = ScoringReviewer(
    provider=LiteLLMProvider(model="gpt-4o-mini"),
    name="reviewer",
    max_concurrent_requests=20, 
    backstory="A frequent book reader",
    input_description="TRUE/FALSE questions about stories",
    model_args={"max_tokens": 200, "temperature": 0.1},
    reasoning = "brief",
    scoring_task="Decide if the input statement is True or False given the provided story in the provided context",
    scoring_set=[1, 2],
    scoring_rules='Score 1 if the statement is TRUE and 2 if the statement is FALSE.',
    additional_context = find_relevant_story
)

review = ReviewWorkflow(
    workflow_schema=[
        {
            "round": 'A',
            "reviewers": [reviewer],
            "inputs": ["question"]
        }
    ]
)

updated_data = asyncio.run(review(data))
updated_data



Processing 15 eligible rows


['round: A', 'reviewer_name: reviewer'] -                     2024-12-23 00:12:42: 100%|██████████| 15/15 [00:02<00:00,  5.97it/s]

The following columns are present in the dataframe at the end of reviewer's reivew in round A: ['question', 'answer', 'story', 'round-A_reviewer_output', 'round-A_reviewer_reasoning', 'round-A_reviewer_score', 'round-A_reviewer_certainty']





Unnamed: 0,question,answer,story,round-A_reviewer_output,round-A_reviewer_reasoning,round-A_reviewer_score,round-A_reviewer_certainty
0,Lumo was a creature living in a bustling city.,False,Deep in the heart of the Enchanted Forest live...,{'reasoning': 'The statement claims that Lumo ...,The statement claims that Lumo was a creature ...,2,95
1,The archway in the story was covered in moss.,True,Deep in the heart of the Enchanted Forest live...,{'reasoning': 'The statement is TRUE because t...,The statement is TRUE because the story explic...,1,90
2,Lumo met a fox who was the guardian of the grove.,False,Deep in the heart of the Enchanted Forest live...,{'reasoning': 'The statement is false because ...,The statement is false because Lumo met an owl...,2,90
3,Did the Whispering Tree have golden leaves?,False,"In the heart of the Enchanted Forest, there st...",{'reasoning': 'The statement is FALSE because ...,The statement is FALSE because the Whispering ...,2,90
4,Was Elara a young boy?,False,"In the heart of the Enchanted Forest, there st...",{'reasoning': 'The statement is FALSE because ...,The statement is FALSE because the context doe...,2,90
5,Did the tree reveal the secret to finding wealth?,False,"In the heart of the Enchanted Forest, there st...",{'reasoning': 'The statement is FALSE because ...,The statement is FALSE because the story does ...,2,90
6,Jasper lived in a town called Lumina.,True,"In the distant town of Lumina, nestled between...",{'reasoning': 'The statement is TRUE because t...,The statement is TRUE because the context clea...,1,100
7,Athena the clockwork owl was made of gold.,False,"In the distant town of Lumina, nestled between...",{'reasoning': 'The statement claims that Athen...,The statement claims that Athena the clockwork...,2,90
8,A lightning strike caused Athena to startle th...,True,"In the distant town of Lumina, nestled between...",{'reasoning': 'The statement is FALSE because ...,The statement is FALSE because the lightning s...,2,90
9,Elias was a fisherman in a bustling city.,False,"In a small coastal village, nestled between th...",{'reasoning': 'The statement claims that Elias...,The statement claims that Elias was a fisherma...,2,90


In [14]:
reviewer.memory[11]

{'system_prompt': "Your name is: <<reviewer>> Your backstory is: <<A frequent book reader>>. Your task is to review input itmes with the following description: <<TRUE/FALSE questions about stories>>. Your final output should have the following keys: reasoning (<class 'str'>), score (<class 'int'>), certainty (<class 'int'>).",
 'model_args': {'max_tokens': 200, 'temperature': 0.1},
 'input_prompt': "**Review the input item below and complete the scoring task as instructed:** --- **Input item:** <<Review Task ID: A-11 Content Hash: d70c23008dcfb6977b0f68e59d7f1dce === question === The coins Elias found were covered in barnacles.>> **Scoring task:** <<Decide if the input statement is True or False given the provided story in the provided context>> --- **Instructions:** 1. **Score** the input item using only the values in this set: [1, 2]. 2. Follow these rules when determining your score: <<Score 1 if the statement is TRUE and 2 if the statement is FALSE.>>. 3. After assigning a score, r