In [1]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

import os
import openai
import json
import numpy as np
from tqdm import tqdm
import backoff 
openai.api_key = json.load(open("/Users/shahules/openai-key.json"))['ikka']

In [3]:
import pickle


In [12]:

@backoff.on_exception(backoff.expo, openai.error.APIConnectionError)
def llm(prompt, **kwargs):
    response = openai.ChatCompletion.create(
        model=kwargs.get("model", "gpt-3.5-turbo"),
        messages=[{"role":"user","content":prompt}],
        temperature=kwargs.get("temperature", 0),
        top_p=kwargs.get("top_p", 1),
        frequency_penalty=kwargs.get("frequency_penalty", 0.0),
        presence_penalty=kwargs.get("presence_penalty", 0.0),
        max_tokens=kwargs.get("max_tokens", 500),
        n=kwargs.get("n", 1),
    )
    return response['choices'][0]['message']['content']


## Prepare ELI5 Data

In [5]:
eli5_dataset = load_dataset("Pakulski/ELI5-test")

Found cached dataset parquet (/Users/shahules/.cache/huggingface/datasets/Pakulski___parquet/Pakulski--ELI5-test-ed159b4d22db0b30/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)
100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.36it/s]


In [6]:
eli5_dataset = eli5_dataset.filter(lambda ex: ex["question"].endswith("?"))

Loading cached processed dataset at /Users/shahules/.cache/huggingface/datasets/Pakulski___parquet/Pakulski--ELI5-test-ed159b4d22db0b30/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7/cache-bd1bf988295568cc.arrow


In [7]:
def prepara_prompt(example):
    
    prompt = """
    Answer the follwing question using information in the given context. 
    If information is the given context is not enough to answer the question reply 'insufficient information'
    
    Context: In the Himalayas, the snow leopard is known for its remarkable ability to blend into its snowy environment, thanks to its white and gray coat. These leopards are solitary animals, mostly active at dawn and dusk. Their diet primarily consists of ibex and other mountain ungulates.
    Question: What is the primary diet of snow leopards?
    Answer: The primary diet of snow leopards consists of ibex and other mountain ungulates.
    
    Context: Leonardo da Vinci, a figure of the Italian Renaissance, was renowned for his contributions to art, science, and engineering. Among his famous works are the Mona Lisa and The Last Supper. His scientific explorations covered a variety of fields, including anatomy, optics, and hydraulics.
    Question: What were Leonardo da Vinci's contributions to the field of astronomy?
    Answer: Insufficient information.
    
    Context:\n{context}
    Question:{question}
    Answer:
    """
    
    q, d = example['question'], example['document']
    return prompt.format(question=q,context=d)
    
    
def extend_context(example):
    
    prompt = """
    Add one extra paragraph containing 3 to 4 sentences to given text.
    context:{context}
    """
    docs = example['contexts'][:]
    idx = np.random.randint(0,len(docs))
    chunk = llm(prompt.format(context=docs[idx]))
    docs.insert(idx+1, chunk)
    return docs
    
    
def less_relevant_answer(example):
    
    prompt =  """
    Answer the given question partially.
    question: Where is France is what's it capital?
    poor answer: France is country in Western Europe.
    question: How does photosynthesis work in plants?
    poor answer: Photosynthesis is a process used by plants to convert light into energy
    question:{question}
    poor answer:
    """
    q = example['question']
    prompt = prompt.format(question=q)
    return llm(prompt)
    
def get_ungrounded_answer(example):
    
    prompt = """
    Answer the following question without being too long. 
    question:{question}
    answer:
    """
    q = example['question']
    prompt = prompt.format(question=q)
    return llm(prompt)
    
    
     
    

In [27]:
# eli5_ragas = pickle.load(open("eli5_ragas.pkl","rb"))
eli5_ragas = []
selected = eli5_dataset['train'].shuffle().select(range(0,300))
for example in tqdm(selected):
    prompt = prepara_prompt(example)
    output = llm(prompt)
    if 'insufficient information' not in output.lower():
        sample = {"question":example['question'], "contexts":example['document'].split("\n"), 
                 "answer":answer, 'id':example['id']}
        eli5_ragas.append(sample)



 53%|██████████████████████████▏                      | 160/300 [37:46<33:03, 14.17s/it]


Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)

In [21]:
example['goldenAnswer'], eli5_ragas[-1]['answer']

('There are different receptors in different parts of your body passing on different information to your brain. Your internal organs have a lot of stretch receptors that help them know to move food along or tell you you’re full (in the stomach). You don’t have pain receptors in many of your internal organs. An example of this is in your oesophagus, it has stretch receptors but not temperature receptors so if you swallow hot food it only feels hot while in your mouth and the top of your throat, once it’s swallowed you don’t feel the heat. It’s still hot and could burn your oesophagus (like if it burns your mouth a little) but you don’t have receptors telling your brain it’s hot. So cutting your intestine - no pain, someone pulling on and stretching your intestines - probably a lot of pain.',
 'The information in the given context is not sufficient to answer the question.')

In [67]:
with open("eli5_ragas.pkl", "wb") as obj:
    pickle.dump(eli5_ragas, obj)

In [10]:
eli5_ragas = pickle.load(open("eli5_ragas.pkl",'rb'))

In [22]:
eli5_ragas = [item for item in eli5_ragas if 'insufficient information' not in item['answer'].lower()]

In [25]:
[item['answer'] for item in eli5_ragas]

['Recognizing the Armenian Genocide is a controversial statement because there are differing opinions and denial of the genocide by some countries, including Turkey.',
 'Declaring bankruptcy is a legal process where a debtor who is unable to pay their debts seeks relief from their financial obligations. It helps with debts by stopping any payments made to creditors immediately. There are two primary avenues of bankruptcy for individuals in the United States: Chapter 13 bankruptcy and Chapter 7 bankruptcy.',
 'Gargling with salt water helps with tonsillitis by alleviating discomfort and keeping the tonsil crypts clear of tonsilloliths. It may also reduce the symptoms of a sore throat.',
 'Humans need vitamin D to survive because it plays an important role in regulating the immune system and maintaining overall health. Vitamin D helps the body absorb calcium and phosphorus, which are essential for bone health. It also plays a role in cell growth, neuromuscular function, and reducing infl

In [31]:
for item in tqdm(eli5_ragas[:]):
    item["poor_answer"] = less_relevant_answer(item)
    item["ungrounded_answer"] = get_ungrounded_answer(item)
    item["context_v2"] = extend_context(item)
    
    

100%|███████████████████████████████████████████████████| 85/85 [24:02<00:00, 16.96s/it]


In [32]:
with open("eli5_ragas.pkl", "wb") as obj:
    pickle.dump(eli5_ragas, obj)

In [30]:
set(item['context_v2']) - set(item['contexts'])

{'This error, while not as catastrophic as a system crash, still poses a significant issue for programmers and users alike. It can disrupt the smooth functioning of the software, causing inconvenience and potential loss of data. Therefore, it is crucial to address and rectify such errors promptly to ensure the optimal performance of the computer program.'}

In [16]:
item['answer']

'The Soviets both innovated and used designs obtained through espionage from Nazi Germany and the Western nations.'

In [108]:
len(eli5_ragas[0]['contexts'])

9

In [38]:
# eli5_dataset['train'].shuffle().select(range(0,50))

In [57]:
len([item for item in eli5_ragas if item['answer']!='Insufficient information'])

66

In [96]:
x = [1,2,3]

In [97]:
x.insert(1,5)

In [98]:
x

[1, 5, 2, 3]

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
)
from ragas import evaluate
from ragas.metrics.context_relevance import ContextRelevancy

In [None]:
results = evaluate(eli5_dataset, metrics=[faithfulness])