This is to evaluate RAG Performance System overall

# Load documents with IDs

In [1]:
import json

with open('documents-crypto-with-ids.json', 'rt') as f_in:
    documents = json.load(f_in)

In [2]:
documents[10]

{'question': 'What is block time?',
 'answer': 'Depending upon how a particular blockchain protocol was developed, the time that it takes for a block to be added to the canonical chain can vary widely. A blockchain is a linear construct in that every new block occurs at a later time than the one that preceded it and cannot be undone. A blockchain’s linearity serves as an ideal form of validation. According to ethstats.io as of July 2019, for the Ethereum blockchain, new blocks are added approximately every 14 seconds.',
 'id': '04090abf'}

# Load ground truth

In [3]:
import pandas as pd

In [5]:
df_ground_truth = pd.read_csv('data\ground-truth-data-crypto.csv')
df_ground_truth.drop(columns=['total_count'], inplace=True)
ground_truth = df_ground_truth.to_dict(orient='records')

In [6]:
ground_truth[10]

{'question': 'What does a blockchain system encompass?',
 'document': 'e3753a49'}

In [7]:
doc_idx = {d['id']: d for d in documents}

# Index data

In [8]:
from sentence_transformers import SentenceTransformer

model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)



In [9]:
from elasticsearch import Elasticsearch

es_client = Elasticsearch('http://localhost:9200') 

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "question": {"type": "text"},
            "id": {"type": "keyword"},
            "question_answer_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
        }
    }
}

index_name = "crypto-questions"

# es_client.indices.delete(index=index_name, ignore_unavailable=True)
# es_client.indices.create(index=index_name, body=index_settings)

In [13]:
from tqdm.auto import tqdm

# for doc in tqdm(documents):
#     question = doc['question']
#     text = doc['answer']
#     doc['question_answer_vector'] = model.encode(question + ' ' + text)

#     es_client.index(index=index_name, document=doc)

  0%|          | 0/46 [00:00<?, ?it/s]

# Retrieval

In [16]:
def elastic_search_knn(field, vector):
    knn = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000,
    }

    search_query = {
        "knn": knn,
        "_source": ["answer", "question", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

def question_answer_vector_knn(q):
    question = q['question']
    v_q = model.encode(question)

    return elastic_search_knn('question_answer_vector', v_q)

In [17]:
question_answer_vector_knn(dict(
    question='What is a blockchain?'
))

[{'question': 'What is a blockchain?',
  'answer': 'A blockchain is a distributed, cryptographically-secure database structure that allows network participants to establish a trusted and immutable record of transactional data without the need for intermediaries. A blockchain can execute a variety of functions beyond transaction settlement, such as smart contracts. Smart contracts are digital agreements that are embedded in code and that can have limitless formats and conditions. Blockchains have proven themselves as superior solutions for securely coordinating data, but they are capable of much more, including tokenization, incentive design, attack-resistance, and reducing counterparty risk. The very first blockchain was the Bitcoin blockchain, which itself was a culmination of over a century of advancements in cryptography and database technology.',
  'id': 'ccb39dc7'},
 {'question': 'What is a block in a blockchain?',
  'answer': 'The “block” in a blockchain refers to a block of tran

# The RAG flow

In [18]:
def build_prompt(query, search_results):
    prompt_template = """
You're a personal assisstant which teach people which still beginner about investing in cryptocurrency. 
Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"\nquestion: {doc['question']}\nanswer: {doc['answer']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [19]:
from openai import OpenAI

client = OpenAI()

def llm(prompt, model='gpt-4o'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [20]:
# previously: rag(query: str) -> str
def rag(query: dict, model='gpt-4o') -> str:
    search_results = question_answer_vector_knn(query)
    prompt = build_prompt(query['question'], search_results)
    answer = llm(prompt, model=model)
    return answer

Let's test the answer with the answer which generate by LLM

In [21]:
ground_truth[10]

{'question': 'What does a blockchain system encompass?',
 'document': 'e3753a49'}

In [25]:
answer_llm = rag(ground_truth[10])

In [26]:
answer_orig = doc_idx['e3753a49']['answer']

# Cosine similarity metric

In [27]:
v_llm = model.encode(answer_llm)
v_orig = model.encode(answer_orig)

v_llm.dot(v_orig)

0.92557955

In [28]:
answers = {}

In [30]:
for i, rec in enumerate(tqdm(ground_truth)):
    if i in answers:
        continue

    answer_llm = rag(rec)
    doc_id = rec['document']
    original_doc = doc_idx[doc_id]
    answer_orig = original_doc['answer']

    answers[i] = {
        'answer_llm': answer_llm,
        'answer_orig': answer_orig,
        'document': doc_id,
        'question': rec['question']
    }

  0%|          | 0/205 [00:00<?, ?it/s]

In [36]:
results_gpt4o = [None] * len(ground_truth)

for i, val in answers.items():
    results_gpt4o[i] = val.copy()
    results_gpt4o[i].update(ground_truth[i])

In [38]:
import pandas as pd
df_gpt4o = pd.DataFrame(results_gpt4o)
df_gpt4o.to_csv('results-gpt4o-crypto.csv', index=False)

# Evaluating GPT 3.5

In [39]:
rag(ground_truth[10], model='gpt-3.5-turbo')

'A blockchain system encompasses all aspects and features that go into a particular blockchain, including the consensus algorithm, state machine, and cryptographic functions. It also includes qualifiers that help understand the characteristics of the blockchain, such as being open, public, decentralized, neutral, and censorship-resistant.'

In [40]:
from tqdm.auto import tqdm

from concurrent.futures import ThreadPoolExecutor

pool = ThreadPoolExecutor(max_workers=6)

def map_progress(pool, seq, f):
    results = []

    with tqdm(total=len(seq)) as progress:
        futures = []

        for el in seq:
            future = pool.submit(f, el)
            future.add_done_callback(lambda p: progress.update())
            futures.append(future)

        for future in futures:
            result = future.result()
            results.append(result)

    return results

In [41]:
def process_record(rec):
    model = 'gpt-3.5-turbo'
    answer_llm = rag(rec, model=model)
    
    doc_id = rec['document']
    original_doc = doc_idx[doc_id]
    answer_orig = original_doc['answer']

    return {
        'answer_llm': answer_llm,
        'answer_orig': answer_orig,
        'document': doc_id,
        'question': rec['question']
    }

In [42]:
process_record(ground_truth[10])

{'answer_llm': 'A blockchain system encompasses all aspects and features that go into a particular blockchain, including the consensus algorithm, state machine, and cryptographic functions. It includes qualifiers such as open, public, decentralized, neutral, and censorship-resistant to help understand the characteristics of the blockchain.',
 'answer_orig': 'A blockchain system refers to all the aspects and features that go into a particular blockchain, everything from the consensus algorithm to the state machine to cryptographic functions. As Andreas Antonopoulus and Gavin Wood note in Mastering Ethereum, there are “a huge variety of blockchains with different properties”––qualifiers “help us understand the characteristics of the blockchain in question, such as open, public, decentralized, neutral, and censorship-resistant.”',
 'document': 'e3753a49',
 'question': 'What does a blockchain system encompass?'}

In [43]:
results_gpt35 = map_progress(pool, ground_truth, process_record)

  0%|          | 0/205 [00:00<?, ?it/s]

In [44]:
df_gpt35 = pd.DataFrame(results_gpt35)
df_gpt35.to_csv('results-gpt35.csv', index=False)

## Cosine similarity

A->Q->A' cosine similarity

A -> Q -> A'

cosine(A, A')

### gpt-4o

In [45]:
results_gpt4o = df_gpt4o.to_dict(orient='records')

In [46]:
record = results_gpt4o[0]

In [47]:
def compute_similarity(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = model.encode(answer_llm)
    v_orig = model.encode(answer_orig)
    
    return v_llm.dot(v_orig)

In [48]:
similarity = []

for record in tqdm(results_gpt4o):
    sim = compute_similarity(record)
    similarity.append(sim)

  0%|          | 0/205 [00:00<?, ?it/s]

In [49]:
df_gpt4o['cosine'] = similarity
df_gpt4o['cosine'].describe()

count    205.000000
mean       0.739126
std        0.144457
min        0.247286
25%        0.654272
50%        0.743877
75%        0.849718
max        0.994514
Name: cosine, dtype: float64

In [50]:
import seaborn as sns

ModuleNotFoundError: No module named 'seaborn'