In [25]:
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.retrievers import BM25Retriever, EnsembleRetriever



In [13]:
llm = Ollama(model="mistral",  callbacks=CallbackManager([StreamingStdOutCallbackHandler()]),num_gpu=1, base_url="http://localhost:11434")

In [14]:
modelPath = "BAAI/bge-large-en-v1.5"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cuda:0'}
encode_kwargs = {'normalize_embeddings': True}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embedding = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

  from .autonotebook import tqdm as notebook_tqdm
.gitattributes: 100%|██████████| 1.52k/1.52k [00:00<00:00, 7.76MB/s]
1_Pooling/config.json: 100%|██████████| 191/191 [00:00<00:00, 1.05MB/s]
README.md: 100%|██████████| 90.3k/90.3k [00:00<00:00, 440kB/s]
config.json: 100%|██████████| 779/779 [00:00<00:00, 4.90MB/s]
config_sentence_transformers.json: 100%|██████████| 124/124 [00:00<00:00, 632kB/s]
model.safetensors: 100%|██████████| 1.34G/1.34G [30:50<00:00, 725kB/s]
pytorch_model.bin: 100%|██████████| 1.34G/1.34G [30:45<00:00, 727kB/s]
sentence_bert_config.json: 100%|██████████| 52.0/52.0 [00:00<00:00, 411kB/s]
special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 923kB/s]
tokenizer.json: 100%|██████████| 711k/711k [00:01<00:00, 603kB/s]
tokenizer_config.json: 100%|██████████| 366/366 [00:00<00:00, 1.36MB/s]
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 811kB/s]
modules.json: 100%|██████████| 349/349 [00:00<00:00, 2.74MB/s]


In [68]:
loader = DirectoryLoader("./data", glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()


In [69]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
len(texts)

632

In [70]:
import torch
torch.cuda.empty_cache()
# print(torch.cuda.memory_summary(device=None, abbreviated=False))
import gc
torch.cuda.empty_cache()
gc.collect()

4892

In [71]:
from langchain.vectorstores import Chroma
persist_directory = './db'
vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)
vectordb.persist()

In [72]:
import torch
dev = "cuda:0" if torch.cuda.is_available() else "cpu"
dev

'cuda:0'

In [73]:
retriever = vectordb.as_retriever(search_kwargs={'k': 5})

In [86]:
bm25_retriever = BM25Retriever.from_documents(texts)
bm25_retriever.k =  4

In [87]:
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, retriever],
                                       weights=[0.5, 0.5])

In [88]:
ensemble_retriever.get_relevant_documents("scholars admitted in 2019, 17")

[Document(page_content='\uf0b7 For scholars admitted in 2019 , the default option ID is 13  and for the \nscholars admitted in 2020 , the default option ID is 17 . \n\uf0b7 For the scholars admitted in 2018 or before , please select the \nappropriate option ID from t he list. (To know the available', metadata={'page': 1, 'source': 'data/2. Course work allocation and DAC Minutes Upload.pdf'}),
 Document(page_content='2. For the scholars admitted till 2017, select Research Scholars – Pre 2018 and  \nFor the scholars admitted from 2018, select Research Scholars – 2018  \n3. In the Course Allocation by Guide  page, choose the Register Number  of the \nScholar.  \n4. Choose the option ID', metadata={'page': 1, 'source': 'data/2. Course work allocation and DAC Minutes Upload.pdf'}),
 Document(page_content='Note: For the publications of the scholars to be considered for the adjudication of the \nthesis, the scholars shall include VIT’s affiliation in their publications.', metadata={'source': 

In [97]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                  retriever=ensemble_retriever,
                                  return_source_documents=True)
     

In [91]:
def process_llm_response(query):
    llm_response = qa_chain(query)
    return llm_response['result']
    # print('\n\nSources:')
    # for source in llm_response["source_documents"]:
    #     print(source.metadata['source'])

In [98]:
query = "minimum time duration for direct ph.d"
process_llm_response(query)
# qa_chain(query)

 Based on the provided context, there is no information given about the minimum time duration for a direct Ph.D. program. Therefore, I don't have an answer for that question.

" Based on the provided context, there is no information given about the minimum time duration for a direct Ph.D. program. Therefore, I don't have an answer for that question."

In [67]:
def main():
    st.title("VIT QA System")
    user_input = st.text_area("Enter Query")

    if st.button("Get response"):
        result = process_llm_response(user_input)
        st.write("Response:")
        # make text bigger
        st.write(f"**{result}**")
main()