In [None]:
%pip install langchain
%pip install langchain-community
%pip install fastapi
%pip install uvicorn
%pip install python-multipart
%pip install ctransformers
%pip install qdrant-client
%pip install torch
%pip install sentence_transformers
%pip install chromadb
%pip install flask

In [None]:
from flask import Flask, request, jsonify, render_template
import os
import json
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_community.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.document_loaders import PyPDFLoader

In [None]:
app = Flask(__name__)

Initialize LLM and other components as in the original code

In [None]:
local_llm = "neural-chat-7b-v3-3.Q4_K_M.gguf"
config = {
    'max_new_tokens': 1024,
    'repetition_penalty': 1.1,
    'temperature': 0.1,
    'top_k': 50,
    'top_p': 0.9,
    'stream': True,
    'threads': int(os.cpu_count() / 2)
    }

In [None]:
llm = CTransformers(
    model=local_llm,
    model_type="mistral",
    lib="avx2",
    **config
)

In [None]:
print("LLM Initialized....")

prompt_template = 
Use the following pieces of information to answer the user's question.<br>
If you don't know the answer, just say that you don't know, don't try to make up an answer.<br>
Context: {context}<br>
Question: {question}<br>
Only return the helpful answer below and nothing else.<br>
Helpful answer:<br>


In [None]:
model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

In [None]:
load_vector_store = Chroma(persist_directory="stores/pet_cosine", embedding_function=embeddings)

In [None]:
retriever = load_vector_store.as_retriever(search_kwargs={"k":1})

In [None]:
@app.route('/')
def index():
    return render_template('index.html')

In [None]:
@app.route('/get_response', methods=['POST'])
def get_response():
    query = request.form.get('query')
    # Your logic to handle the query
    chain_type_kwargs = {"prompt": prompt}
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs=chain_type_kwargs,
        verbose=True
    )
    response = qa(query)
    answer = response['result']
    source_document = response['source_documents'][0].page_content
    doc = response['source_documents'][0].metadata['source']
    response_data = {"answer": answer, "source_document": source_document, "doc": doc}
    
    return jsonify(response_data)

In [None]:
if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=5000)