In [None]:
import os
from mistralai import Mistral
from dotenv import load_dotenv
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
from qdrant_client import QdrantClient

In [None]:
load_dotenv()
qdrant_client = QdrantClient(":memory:")

In [None]:
api_key = os.getenv("MISTRALAI_API_KEY")
client = Mistral(api_key=api_key)

In [None]:
def generate_context(url):
    model = "pixtral-12b-2409"
    
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Extract the text from the image precisely, extract every text"
                },
                {
                    "type": "image_url",
                    "image_url": url
                }
            ]
        }
    ]
    
    # Get the chat response
    chat_response = client.chat.complete(
        model=model,
        messages=messages
    )
    
    # return the context
    return chat_response.choices[0].message.content

In [None]:
def embed(context, model = "mistral-embed"):
    embeddings_batch_response = client.embeddings.create(
        model=model,
        inputs= context
    )
    return embeddings_batch_response

In [None]:
def generate_embeddings(context: str):
    model = "mistral-embed"
    context = context.split('\n')

    
    data = []
    
    client = Mistral(api_key=api_key)
    
    embeddings_batch_response = embed(context, model)

    for i in range(len(embeddings_batch_response.data)):
        temp = []
        
        temp.append(context[i])
        temp.append(embeddings_batch_response.data[i].embedding)
        
        data.append(temp)

    return data
    


In [None]:
def initialize_qdrant(length: int):
    vector_size = length
    # Define the vectors configuration
    vector_params = VectorParams(
        size=vector_size,                # Size of the vectors
        distance=Distance.COSINE         # Choose distance metric (COSINE, EUCLID, or IP)
    )
    
    # Create the collection with the specified configuration
    if "CHATBOT" not in qdrant_client.get_collections().collections[0].name:
        qdrant_client.create_collection(
            collection_name="CHATBOT",
            vectors_config=vector_params  # Specify vector configuration
        )

In [None]:
def qdrant_entry(final_data):
    points=[PointStruct( id=i,  vector=final_data[i][1],payload={'raw_context':final_data[i][0] }) for i in range(len(final_data))]
    qdrant_client.upsert(collection_name="CHATBOT", points=points)
    print(qdrant_client.get_collections())



In [None]:
 def query_qdrant(query, collection_name='CHATBOT', limit=4):
     
    query_vector=embed([query]).data[0].embedding
     
    result = qdrant_client.search(
        collection_name = collection_name,
        query_vector=query_vector,
        limit = limit,
        with_vectors = False
    )
    # search_result=[]
    
    return result

In [None]:
def prepare_llm_context(result):
    # result[0].payload['raw_context']
    context =[]
    for i in range(len(result)):
        context.append(result[i].payload['raw_context'])

    return context

In [None]:
def query_llm(context: list, query: str):
    model = "mistral-large-latest"
    
    chat_response = client.chat.complete(
        model = model,
        messages = [
            {
                
                "role": "system",
                "content": "You are an answer generation agent, you'll be given context and query, generate answer in human readable form",
                
                "role": "user",
                "content": f"here's the question {query} and here's the context {'--'.join(context)}",
            },
        ]
    )
    
    return chat_response.choices[0].message.content

In [None]:
if __name__ == "__main__":
    choice = input("Enter 'query' for rag query \n 'entry' for rag entry")
    if choice == 'entry':
        url: str = "https://assets.techrepublic.com/uploads/2017/04/aexcelpowerbi.png"
        
        context = generate_context(url)

        final_data = generate_embeddings(context) 
        # final_data[1][1]--->size
        initialize_qdrant(len(final_data[0][1]))

        qdrant_entry(final_data)
    else:
        query = input("Enter the query : ")
        result = query_qdrant(query)

        context = prepare_llm_context(result)

        response = query_llm(context, query)

In [None]:
print(response)