# Check RAG evaluation. 
- Retrieval: Primarly examine the recall of the RAG system.
- Generation: Secondarily examine the generation quality of the RAG system.


# Retrieval
Context Precision & Context Recall

# Generation
Faithfulness


In [11]:
import sys
import os
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()

current_dir = Path.cwd().parent

sys.path.append(str(current_dir))
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
# load & chunk data 


pdf_path = (
        current_dir / "backend" / "docs" / "Bitcoin - A Peer-to-Peer Electronic Cash System.pdf"
    )



loader_py = PyMuPDFLoader(pdf_path)
pages = loader_py.load()

recursive_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, separators=[".", "?", "!"], chunk_overlap=200
)

docs = recursive_text_splitter.split_documents(pages)

# create embeddings 

from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(
    embedding=OpenAIEmbeddings(model="text-embedding-3-small"),
)

vector_store.add_documents(docs)


# generate questions
questions = []
#metrics = {"tokens": 0, "cost": 0}

def generate_question(text):
    question_generator = ChatPromptTemplate.from_template(
        "Generate a question a user would ask about the following text. The question should be a single sentence and and not use phrases 'like according to this paper': {text}"
    )
    question_llm = ChatOpenAI(model="gpt-3.5-turbo")

    question_chain = question_generator | question_llm


    return question_chain.invoke({"text": text})

for doc in docs:
    questions.append(generate_question(doc.page_content))

content='What is the main benefit of a purely peer-to-peer version of electronic cash according to the text?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 131, 'total_tokens': 152, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-3f0014f7-caab-43d5-b729-d91cf141c620-0' usage_metadata={'input_tokens': 131, 'output_tokens': 21, 'total_tokens': 152, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
content='How can digital signatures and a peer-to-peer network be used to prevent double-spending in transactions?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_toke

KeyboardInterrupt: 