Test questions using RAG
- Vector db: qdrant
--OpenIA LLM model / Embedding 


In [1]:
#import libraries
import os
import textwrap
from langchain.prompts import PromptTemplate
from langchain_community.embeddings.openai import OpenAIEmbeddings
from qdrant_client import QdrantClient
from openai import OpenAI

In [3]:
#env variables
from dotenv import load_dotenv
load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
QDRANT_URL= os.getenv('QDRANT_URL')
QDRANT_API_KEY=""
collection_name = os.getenv('QDRANT_TABLE_NAME')



In [5]:
def get_prompt(retriever_model, qdrant_client, question, collection_name):
    # encoded_query = get_embeddings(question)  # generate embeddings for the question
    encoded_query = retriever_model.embed_query(question)
    prompt_dont_know = \
        """
            You are a helpful AI language model. 
            Politely state that you do not know the answer and you are doing best to learn new documents to answer future questions.
        """
    explanation_docs = []
    if encoded_query is None:
        return prompt_dont_know, explanation_docs
    result = qdrant_client.search(
        collection_name=collection_name,
        query_vector=encoded_query,
        limit=7, score_threshold=0.7
    )
    # print(f"Result from qdrant - Items found {len(result)}")
    # for text in result:
    #     print(text.payload['page_content'])
    if len(result) == 0:
        return prompt_dont_know, explanation_docs
    prompt_template = \
        """
            You are an specialist in answering questions about databricks
            Don't try to make up an answer, if you don't know just say that you don't know.
            Answer in the same language the question was asked.
            Where possible, be specific, provide examples, and explain your thinking.
            
            # context from our documents
            {context}
            
            # question to answer.
            {question}
            
        """
    PROMPT = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )
    prompt = PROMPT.format(question=question, context="\n\n".join([text.payload['page_content'] for text in result]))
    explanation_docs = list(set([text.payload["metadata"]['filename'].split("/")[-1] for text in result]))
    return prompt,explanation_docs


def serve_data_streaming(llmclient, model, prompt):
    stream = llmclient.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        stream=True,
    )
    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

def serve_data(llmclient, model, prompt):
    response = llmclient.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
    )
    return(response.choices[0].message.content)


def print_wrapped(text, width=50):
    wrapped_text = textwrap.fill(text, width=width)
    print(wrapped_text)


In [6]:
#set openai api key
api_key=os.getenv("OPENAI_API_KEY")

#set OpenAI embedding model 
embeddings = OpenAIEmbeddings(
            openai_api_key=api_key
        )

#vector db client
qdrant_client = QdrantClient(url=QDRANT_URL,
                             port=None,
                             api_key=QDRANT_API_KEY)




  warn_deprecated(


In [7]:
#prepare a question an call get prompt to embed question and extract top question from vector db and the prompt to call llm
question="In this eBook, you’ll learn:?"
prompt,explanation_docs=get_prompt(embeddings, qdrant_client, question, collection_name)
print(explanation_docs)
print(prompt)

['Databricks-Big-Book-Of-GenAI-FINAL.pdf']

            You are an specialist in answering questions about databricks
            Don't try to make up an answer, if you don't know just say that you don't know.
            Answer in the same language the question was asked.
            Where possible, be specific, provide examples, and explain your thinking.
            
            # context from our documents
            In this eBook, you’ll learn:

the incredible insights you’ll gain!

### Instruction:

### Instruction:

### Instruction:

CONTENTS

the following:
            
            # question to answer.
            In this eBook, you’ll learn:?
            
        


Call to LLM with the dynamic prompt 

In [8]:
client = OpenAI(api_key=api_key)
model="gpt-3.5-turbo-0125"

response=serve_data(client, model, prompt)


In [9]:
print_wrapped(response, width=50)

You will learn about the incredible insights
you'll gain from the content of this eBook. It
will provide valuable information and knowledge
that will help you understand specific topics in-
depth and gain new perspectives. For example, if
the eBook is about data analysis using Databricks,
you might learn about advanced data manipulation
techniques, machine learning algorithms, and best
practices for optimizing data processing
workflows.
