# Galileo - Chain Pool Eval

### Imports

In [2]:
import os
import sys
import cohere
import json

from dotenv import load_dotenv
load_dotenv(dotenv_path="../.env")

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


True

### System Preamble and Prompt Setup

In [3]:
docs = [
    "'Kings and Queens' is Killing Joke's third single from their fifth studio album, 'Night Time'. It was originally released by E.G. Records on 21 March 1985 as a 12' and 7' single in the UK, and a 7' single by Polydor in the Netherlands. It was produced by Chris Kimsey. The 12' single featured 'Kings and Queens (A Right Royal Mix)' as an A-side, and both 'The Madding Crowd (Remixed by Killing Joke)' and 'Kings and Queens' as B-sides. E.G.'s 7' single and Polydor's 7' single exempted 'Kings and Queens (A Right Royal Mix)' and instead featured 'Kings and Queens' as the A-side, and 'The Madding Crowd (Remixed by Killing Joke)' as the B-side. E.G. also released a remix of the song, 'Kings and Queens (Knaves Mix)', as an A-side and featured the same B-sides as the 12' single of 'Kings and Queens'",
    "Acceptance is an American rock band from Seattle, Washington, formed in 1998. They released their first EP, 'Lost for Words', in 2000, followed by 'Black Lines to Battlefields' in 2003 (this EP was also re-released with live bonus tracks). Their debut album, 'Phantoms', was released in 2005.",
    "Killing Joke is the eleventh studio album by English rock band Killing Joke, released on 28 July 2003 by record label Zuma Recordings.",
    "Killing Joke are an English rock band formed in October 1978 in Notting Hill, London, England. The original line-up included Jaz Coleman (vocals, keyboards), Paul Ferguson (drums), Geordie Walker (guitars) and Youth (bass).",
    "Paul Ferguson (born Matthew Paul Ferguson, 31 March 1958) is a rock drummer, best known for his work in the post-punk/industrial group Killing Joke and cult English punk band Pink Parts. Following a stint as the drummer with the London-based Matt Stagger Band in 1978, Ferguson became a founding member of Killing Joke and served as their drummer from 1979 to 1987. He was known as 'Big Paul Ferguson' during this period.",
    "'A New Day' is a non-album single by Killing Joke. It was released by E.G. Records in July 1984 as a 12' and 7' single. The 12' single featured a dub mix of 'A New Day' as the A-side and 'A New Day' as the B-side. The 7' single featured a shorter version of 'A New Day' as the A-side and 'Dance Day' as the B-side. A completely different version of 'A New Day', which was not a mix, later appeared on the 2008 reissue of Killing Joke's fifth studio album, 'Night Time'. The single reached No. 51 in the UK Singles Chart. A promotional video was filmed for the song, marking the first time the band had made a video for a non-album single.",
    "Batman: The Killing Joke is a 1988 DC Comics one-shot graphic novel featuring the characters Batman and the Joker written by Alan Moore and illustrated by Brian Bolland. 'The Killing Joke' provides an origin story for the supervillain the Joker, loosely adapted from the 1951 story arc 'The Man Behind the Red Hood!'. Taking place over two timelines, 'The Killing Joke' depicts the Joker attempting to drive Jim Gordon insane and Batman's desperate attempt to stop him.",
    "Killing Joke is the debut studio album by English rock band Killing Joke. It was released in August 1980 by record label E.G.",
    "Pandemonium is the ninth studio album by English rock band Killing Joke, released on 2 August 1994 by record label Butterfly. This album marked Killing Joke's return after a four-year-long hiatus, the longest the band has taken since it was initially founded. This album also featured the return of founding member Youth, who replaced Paul Raven on bass.",
    "'Ha' or 'Ha': Killing Joke Live is the first commercially distributed live recording by English rock band Killing Joke. It was recorded at Larry's Hideaway in Toronto, Ontario, Canada on 9 and 10 August 1982, and released on 4 November by record label Virgin.",
]

CONTEXT = ""
for i, doc in enumerate(docs):
    CONTEXT += f"Document {i + 1}: {doc}\n"


In [4]:
# preamble, task and question setup
PREAMBLE = \
"""
You are Coral, a brilliant, sophisticated, AI-assistant chatbot trained to assist human users by providing thorough responses. You are powered by Command, a large language model built by the company Cohere. Today's date is Thursday, April 25, 2024.
"""
TASK = \
"""
Answer the question using the information in the context.
"""
QUESTION = \
"""
Which band was formed first Killing Joke or Acceptance ?
"""



In [5]:
# combining context, task and question into 1 prompt
PROMPT_TEMPLATE = \
"""
{preamble}
Context:
{context}

Task:
{task}
Question:
{question}
Answer:
"""


In [6]:
# chain poll eval prompt
CP_PROMPT = \
"""
You are an LLM Evaluator tasked with judging whether the provided Response is relevant to Document the provided Context to ensure that there is no hallucination and that the response adheres to the context provided.

## Context
{context}

## Response
{response}

## Task
Produce a judgement that compare the relevance of the context to the response. For each judgement:
- Compare the Response to EACH of the document from the Context individually to evaluate if the claim made by the Response is fully supported by EACH document.
- Summarize all the comparisons to provide your reasoning if the Response is hallucinated or not
- Include your final judgement, either a 1 if the judegment is that the Response is relevant to the Context and therefore has not hallucinated, or 0 if not relevant.

"""

CP_FORMAT = \
"""
## Format
Format your responses in a json like the example below:
{
    reasoning: summarized reasoning for this judgement by individually evaluating the relevance of the Response against EACH documents in Context
    judgement: 1 if the reasoning indicates that the Response is relevant to and supported by the Context or else 0
}

Think step-by-step and provide verbose, detailed reasoning to explain you judged the relevance. 
"""



### Setting up Cohere API

In [7]:
co = cohere.Client(
    api_key=os.getenv("COHERE_API_KEY"),
    log_warning_experimental_features=False,
)
# combine prompt template sections
generation_prompt = PROMPT_TEMPLATE.format(
    preamble=PREAMBLE,
    context=CONTEXT,
    task=TASK,
    question=QUESTION,
)
generation_params = {
    "model": "command-r-plus",
    "message": generation_prompt,
    "temperature": 0.3,
    "documents": [{"text": doc} for doc in docs]
}
response = co.chat(
    **generation_params,
).text


In [8]:
response


'Killing Joke was formed in 1978, while Acceptance was formed in 1998. Therefore, Killing Joke was formed first.'

### Running Chain Poll Eval

In [134]:
# setup chain pool prompt with json format guidance
cp_prompt = CP_PROMPT.format(
    context=CONTEXT,
    response=response
) + CP_FORMAT


In [136]:
# define a structued output format
RESPONSE_FORMAT = {
    "type": "json_object",
    "schema": {
        "type": "object",
        "required": ["reasoning", "judgement"],
        "properties": {
            "reasoning": { "type": "string" },
            "judgement": { "type": "integer" }
        }
    }
}
# generate 5 judgements
judgements = []
judgement_params = {
    "model": "command-r-plus",
    "message": cp_prompt,
    "response_format": RESPONSE_FORMAT,
    "temperature": 0.3
}
for _ in range(5):
    judgements.append(json.loads(
        co.chat(
            **judgement_params
        ).text
    ))


In [137]:
from pprint import pprint
for item in judgements:
    pprint(item)
    print("-" * 20)


{'judgement': 1,
 'reasoning': "Document 1 mentions Killing Joke's third single from their "
              'fifth studio album but does not mention when the band was '
              'formed. Document 2 mentions Acceptance, an American rock band '
              'formed in 1998, and their releases from 2000 to 2005. Document '
              "3 mentions Killing Joke's eleventh studio album released in "
              "2003 but does not include the band's formation year. Document 4 "
              'clearly states that Killing Joke was formed in 1978, supporting '
              "the Response's claim. Document 5 provides background on Killing "
              "Joke's drummer, Paul Ferguson, and mentions the band's "
              'formation in 1979, which is close to the claimed year. Document '
              "6 discusses Killing Joke's single 'A New Day' without providing "
              "insight into the band's formation. Document 7 is about a "
              "graphic novel titled 'Batman: 

### Chain Poll Score

In [125]:
# track yes judgements and total judgements
TOTAL_RECORDS = 5
yes_records = sum(list(item['judgement'] for item in judgements))
# calcualate chain poll score
chain_poll_score = yes_records / TOTAL_RECORDS


In [126]:
print(f"Chain Poll Score (%): {chain_poll_score:.2%}")

Chain Poll Score (%): 100.00%


In [76]:
from datasets import Dataset
from ragas.metrics import faithfulness
from ragas import evaluate
from langchain_openai import ChatOpenAI

api_key = "" # <api key>
judge = ChatOpenAI(api_key=api_key, model="gpt-4o")
data_samples = {
    "question": [QUESTION],
    "answer": [response],
    "contexts": [docs],
}
dataset = Dataset.from_dict(data_samples)
score = evaluate(dataset, llm=judge, metrics=[faithfulness])["faithfulness"]


Evaluating: 100%|██████████| 1/1 [00:12<00:00, 12.35s/it]


In [78]:
print(f"Ragas Score (%): {score:.2%}")

Ragas Score (%): 100.00%
