## RAG app TFM v2
In this use case it is shown how to extract information from a PDF file through LLM queries with RAG (Retrieval Augmented Generation) technology. For this use case is necessary the use of a vector database (in this case FAISS), embeddings and OpenAI model calls. To show the final result, the model is embedded on a Gradio UI.

In [1]:
from dotenv import load_dotenv
import os

from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

load_dotenv("apis.env")
# hf_api_key = os.environ['HF_API_KEY']

True

# Advanced Gradio app

In [2]:
import gradio as gr

# CSS Template 
theme = gr.themes.Base(
    primary_hue="rose",
).set(
    body_background_fill='*neutral_50',
    body_text_color='*neutral_500',
    body_text_weight='300',
    background_fill_primary='*neutral_50',
    background_fill_secondary='*primary_50',
    border_color_primary='*primary_400',
    color_accent_soft='*primary_300',
    link_text_color='*primary_300',
    link_text_color_active='*neutral_300',
    link_text_color_hover='*primary_100',
    link_text_color_visited='*neutral_400',
    code_background_fill='*primary_200',
    button_secondary_background_fill='*neutral_100',
    button_secondary_border_color='*neutral_900',
    button_secondary_text_color='*primary_400',
    button_cancel_background_fill='*primary_600',
    button_cancel_background_fill_hover='*primary_700',
    button_cancel_text_color='*neutral_50',
    slider_color='*primary_500'
)


  from .autonotebook import tqdm as notebook_tqdm


In [50]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

def extract_sentence(dictionary):
    # Extract the value associated with the key 'result'
    sentence = dictionary.get('result', '')
    return sentence

def model_hyperparameters(temperature, max_tokens, db):#
    llm = OpenAI(
        model_name="gpt-3.5-turbo-instruct",
        temperature=temperature,
        max_tokens=max_tokens,
        streaming=True
        )
    chain = RetrievalQA.from_llm(llm=llm, retriever=db.as_retriever())
    return chain

def respond(message, output_label, temperature=0.7, max_tokens=32):
    db = pdf_vectorized_loader(output_label)
    prompt = message
    chain = model_hyperparameters(temperature, max_tokens, db)
    completion = chain(prompt, return_only_outputs=True)
    return extract_sentence(completion)

def pdf_vectorized_loader(pdf_file):
    loader = PyPDFLoader(pdf_file)
    pages = loader.load_and_split()
    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
    db = FAISS.from_documents(pages, embeddings)
    return db

def process_file(uploaded_file):
    if uploaded_file is not None:
        filename = uploaded_file.name
        return filename
    return "No file uploaded."

with gr.Blocks(theme=theme) as demo: 
    gr.Markdown("# TFM RAG App")
    gr.Markdown("With this RAG app, you can generate augmented responses from a selected PDF file.")

    file_input = gr.File(file_types=[".pdf"])
    output_label = gr.Textbox(visible=False)

    file_input.change(process_file, inputs=file_input, outputs=output_label)

    msg = gr.Textbox(label="Ask a question")
    with gr.Accordion(label="Advanced options",open=False):
        temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1.0, value=0.2, step=0.1, info="Regulates the creativity of the answers")
        max_tokens = gr.Slider(label="Max tokens", value=64, maximum=256, minimum=8, step=1, info="Regulates the length of the answers")
    completion = gr.Textbox(label="Response")
    btn = gr.Button("Submit", variant="primary")
    clear = gr.ClearButton(components=[msg, completion], value="Clear console", variant="stop")
    
    btn.click(respond, inputs=[msg, output_label, temperature, max_tokens], outputs=[completion])
    msg.submit(respond, inputs=[msg, output_label, temperature, max_tokens], outputs=[completion])

    gr.Markdown("Created by Ignacio Ojeda Sánchez (www.linkedin.com/in/ignacio-ojeda-sánchez-610924225)", header_links=True)

gr.close_all()
demo.queue().launch(share=True)    

Closing server running on port: 7890
Running on local URL:  http://127.0.0.1:7905
Running on public URL: https://d9455b25233aa1380d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [51]:
gr.close_all()

Closing server running on port: 7890


## Model Evalutation using Llama Index

In [64]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [65]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

In [54]:
doc = "./eBook-How-to-Build-a-Career-in-AI.pdf"
prompt = "What is AI?"
# res = respond(message, doc)
temperature = 0.2
max_tokens = 128

db = pdf_vectorized_loader(doc)
chain = model_hyperparameters(temperature, max_tokens, db)
completion = chain(prompt, return_only_outputs=True)
res = extract_sentence(completion)

In [55]:
print(res)

 AI stands for artificial intelligence. It is a rapidly growing field that involves using computer systems to perform tasks that typically require human intelligence, such as learning, problem-solving, and decision-making. AI has the potential to transform and improve various areas of human life.


In [70]:
# from utils import get_prebuilt_trulens_recorder
from trulens_eval import (
    Feedback,
    TruLlama,
    OpenAI
)
from trulens_eval.feedback import Groundedness

import numpy as np

openai = OpenAI()

qa_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input_output()
)

qs_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
    .on_input()
    .on(TruLlama.select_source_nodes().node.text)
    .aggregate(np.mean)
)

#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
grounded = Groundedness(groundedness_provider=openai)

groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
        .on(TruLlama.select_source_nodes().node.text)
        .on_output()
        .aggregate(grounded.grounded_statements_aggregator)
)

feedbacks = [qa_relevance, qs_relevance, groundedness]

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
        )
    return tru_recorder

tru_recorder = get_prebuilt_trulens_recorder(chain,
                                             app_id="TFM RAG App")


# embed_model = OpenAIEmbedding(model="text-embedding-ada-002", embed_batch_size=10)
# llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
# service_context = ServiceContext.from_defaults(
#     llm=llm, embed_model=embed_model
# )
# index = VectorStoreIndex.from_documents([document],
#                                         service_context=service_context)
# query_engine = index.as_query_engine()
# response = query_engine.query(
#     "What are the keys to building a career in AI?"
# )
# print(str(response))



######################################################################################
# def extract_sentence(dictionary):
#     # Extract the value associated with the key 'result'
#     sentence = dictionary.get('result', '')
#     return sentence

# def model_hyperparameters(temperature, max_tokens, db):#
#     llm = OpenAI(
#         model_name="gpt-3.5-turbo-instruct",
#         temperature=temperature,
#         max_tokens=max_tokens,
#         streaming=True
#         )
#     chain = RetrievalQA.from_llm(llm=llm, retriever=db.as_retriever())
#     return chain

# def respond(message, output_label, temperature=0.7, max_tokens=32):
#     db = pdf_vectorized_loader(output_label)
#     prompt = message
#     chain = model_hyperparameters(temperature, max_tokens, db)
#     completion = chain(prompt, return_only_outputs=True)
#     return extract_sentence(completion)

# def pdf_vectorized_loader(pdf_file):
    # loader = PyPDFLoader(pdf_file)
    # pages = loader.load_and_split()
    # embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
    # db = FAISS.from_documents(pages, embeddings)
    # return db

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [75]:
with tru_recorder as recording:
    for question in eval_questions:
        response = extract_sentence(chain(question))

In [76]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [77]:
records


Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,latency,total_tokens,total_cost
