In [None]:
import mlflow

In [None]:
mlflow_lgging = False

In [None]:
if mlflow_lgging:
    mlflow.set_experiment("Hindi Chatbot")
    mlflow.start_run()

In [None]:
from qdrant_client import QdrantClient

host = "localhost"
port = 6333
client = QdrantClient(host=host, port=port)

if mlflow_lgging:
    mlflow.log_param("qdrant_host", host)
    mlflow.log_param("qdrant_port", port)

In [None]:
import fasttext as ft

embed_model_path = 'wiki.hi.bin'
embed_model = ft.load_model(embed_model_path)

if mlflow_lgging:
    mlflow.log_param("embed_model_path", embed_model_path)

In [None]:
from typing import List
from qdrant_client import QdrantClient
import fasttext as ft

from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever

class QdrantRetriever(BaseRetriever):
    client: QdrantClient
    embed_model: ft.FastText._FastText
    collection_name: str
    limit: int

    def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:
        """Converts query to a vector and retrieves relevant documents using Qdrant."""
        # client = QdrantClient(host="localhost", port=6333)
        query_vector = self.embed_model.get_sentence_vector(query).tolist()
        search_results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=self.limit
        )
        return [Document(page_content=hit.payload['page_content']) for hit in search_results]


In [None]:
collection_name = 'my_collection'
limit = 1

retriever = QdrantRetriever(
    client=client,
    embed_model=embed_model,
    collection_name=collection_name,
    limit=limit
)

if mlflow_lgging:
    mlflow.log_param("collection_name", collection_name)
    mlflow.log_param("limit", limit)

In [None]:
from langchain_community.llms.ollama import Ollama
model_name = 'llama3'
num_predict = 100
num_ctx = 3000
num_gpu = 2
temperature = 0.7
top_k = 50
top_p = 0.95

llm=Ollama(model='llama3', num_predict=100, num_ctx=3000, num_gpu=2, temperature=0.7, top_k=50, top_p=0.95)

if mlflow_lgging:
    mlflow.log_param("model_name", model_name)
    mlflow.log_param("num_predict", num_predict)
    mlflow.log_param("num_ctx", num_ctx)
    mlflow.log_param("num_gpu", num_gpu)
    mlflow.log_param("temperature", temperature)
    mlflow.log_param("top_k", top_k)
    mlflow.log_param("top_p", top_p)

In [None]:
system_prompt = (
    """<s>[INST] आप एक सम्मानीय सहायक हैं। आपका काम नीचे दिए गए संदर्भ से प्रश्नों का उत्तर देना है। आप केवल हिंदी भाषा में उत्तर दे सकते हैं। धन्यवाद।
    
    You are never ever going to generate response in English. You are always going to generate response in Hindi no matter what. You also need to keep your answer short and to the point.

    संदर्भ: {context} </s>
"""
) 

if mlflow_lgging:
    mlflow.log_param("system_prompt", system_prompt)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)

In [None]:
chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
query = 'किस तरह के किरदार और कहानी तत्व रचनाकारों और फिल्म निर्माताओं को आकर्षित करते हैं?'

if mlflow_lgging:
    mlflow.log_param("query", query)

In [None]:
response = chain.invoke({"input": query})

In [None]:
if mlflow_lgging:
    mlflow.log_param("context", response['context'])
    mlflow.log_param("response", response['answer'])

In [None]:
mlflow.end_run()

In [None]:
import gradio as gr

def answer_question(query, history):
    response = chain.invoke({"input": query})
    return response['answer']

gr.ChatInterface(answer_question).launch(share=True)

# Second Approach - Simple and Straightforward

In [None]:
# from qdrant_client import QdrantClient

# client = QdrantClient(host="localhost", port=6333)

# import fasttext as ft
# # Loding model for Hindi.
# embed_model = ft.load_model('wiki.hi.bin')

# query = 'किस तरह के किरदार और कहानी तत्व रचनाकारों और फिल्म निर्माताओं को आकर्षित करते हैं?'

# hits = client.search(
# collection_name="my_collection",
# query_vector= embed_model.get_sentence_vector(query).tolist(),
# limit=1,
# )


# context = ''
# for hit in hits:
#     context += hit.payload['page_content'] + '\n'


# prompt = f"""<s>[INST] आप एक सम्मानीय सहायक हैं। आपका काम नीचे दिए गए संदर्भ से प्रश्नों का उत्तर देना है। आप केवल हिंदी भाषा में उत्तर दे सकते हैं। धन्यवाद।
#     संदर्भ: {context}
#     प्रश्न: {query} [/INST] </s>
# """


# from langchain_community.llms.ollama import Ollama
# llm=Ollama(model='llama3', num_predict=100, num_ctx=3000, num_gpu=2, temperature=0.7, top_k=50, top_p=0.95)

# llm.invoke(prompt)