In [8]:
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.retrievers import BM25Retriever, EnsembleRetriever



In [9]:
llm = Ollama(model="mistral",  callbacks=CallbackManager([StreamingStdOutCallbackHandler()]),num_gpu=1, base_url="http://localhost:11434")

In [10]:
modelPath = "BAAI/bge-large-en-v1.5"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cuda:0'}
encode_kwargs = {'normalize_embeddings': True}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embedding = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [11]:
loader = DirectoryLoader("./data", glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()
len(documents)


91

In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
len(texts)

461

In [13]:
import torch
torch.cuda.empty_cache()
# print(torch.cuda.memory_summary(device=None, abbreviated=False))
import gc
torch.cuda.empty_cache()
gc.collect()

2247

In [14]:
from langchain.vectorstores import Chroma
persist_directory = './db'
vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)
vectordb.persist()

In [15]:
import torch
dev = "cuda:0" if torch.cuda.is_available() else "cpu"
dev

'cuda:0'

In [16]:
retriever = vectordb.as_retriever(search_kwargs={'k': 7})

In [17]:
bm25_retriever = BM25Retriever.from_documents(texts)
bm25_retriever.k =  5

In [18]:
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, retriever],
                                       weights=[0.5, 0.5])

In [None]:
ensemble_retriever.get_relevant_documents("scholars admitted in 2019, 17")

In [19]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                  retriever=ensemble_retriever,
                                  return_source_documents=True)
     

In [20]:
def process_llm_response(query):
    llm_response = qa_chain(query)
    return llm_response['result']
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [21]:
query = "minimum time duration for direct ph.d"
process_llm_response(query)
# qa_chain(query)

 I. Eligibility and Registration for Ph.D. Program

A. Minimum educational qualification: Master's degree with a minimum of 55% aggregate marks (or equivalent) from a recognized university.
B. Application process: Students must submit an application along with required documents to the concerned school, following which they will be called for an interview.
C. Registration and fees payment: Upon acceptance into the Ph.D. program, students must register and pay the registration fee within 15 days.

II. Minimum Time Duration for Direct Ph.D.

The minimum duration for a direct Ph.D. program is three years from the date of registration. However, it's important to note that the actual time taken to complete the research and defend the thesis may vary depending on various factors such as the complexity of the research topic and availability of resources.

" I. Eligibility and Registration for Ph.D. Program\n\nA. Minimum educational qualification: Master's degree with a minimum of 55% aggregate marks (or equivalent) from a recognized university.\nB. Application process: Students must submit an application along with required documents to the concerned school, following which they will be called for an interview.\nC. Registration and fees payment: Upon acceptance into the Ph.D. program, students must register and pay the registration fee within 15 days.\n\nII. Minimum Time Duration for Direct Ph.D.\n\nThe minimum duration for a direct Ph.D. program is three years from the date of registration. However, it's important to note that the actual time taken to complete the research and defend the thesis may vary depending on various factors such as the complexity of the research topic and availability of resources."

In [None]:
def main():
    st.title("VIT QA System")
    user_input = st.text_area("Enter Query")

    if st.button("Get response"):
        result = process_llm_response(user_input)
        st.write("Response:")
        # make text bigger
        st.write(f"**{result}**")
main()