In [1]:
import os
import gradio as gr
import shutil
import time
import warnings

warnings.filterwarnings("ignore")
import textwrap
import langchain
from langchain.llms import HuggingFacePipeline
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
### Multi-document retriever
from langchain.vectorstores import Chroma, FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from InstructorEmbedding import INSTRUCTOR

import glob
from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
import uuid
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
import chromadb
from chromadb.config import Settings
from langchain.prompts import PromptTemplate
langchain.verbose = True
from traceloop.sdk import Traceloop
Traceloop.init(app_name="RAG_AMP", disable_batch=True)

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

from langchain.chains import LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

IP_ADDR=os.environ["VectorDB_IP"]
chroma = chromadb.HttpClient(host=IP_ADDR, port=8000)

access_token = os.environ["HF_TOKEN"]
hugging_face_model = os.environ["HF_MODEL"]

tokenizer = AutoTokenizer.from_pretrained(hugging_face_model, use_auth_token=access_token)

llm_model = AutoModelForCausalLM.from_pretrained(hugging_face_model, #meta-llama/Llama-2-13b-chat-hf
                                                     load_in_4bit=True,
                                                     device_map='balanced_low_0',
                                                     torch_dtype=torch.float16,
                                                     low_cpu_mem_usage=True,
                                                     use_auth_token=access_token
                                                    )
max_len = 8192
llm_task = "text-generation"
T = 0.1



  from .autonotebook import tqdm as notebook_tqdm


[33mNo Traceloop API key provided, auto-creating a dashboard on Traceloop
[33mTRACELOOP_API_KEY= 4d55ec4c920b261498069a50094e7d92d6bb9651feea89b94c906a70cb55b83efa8e577429e4ac6d56ac1943eaa120ba
[32m
Go to https://app.traceloop.com/trace?skt=nWGT8S7mK5kDZBJS2WJkLr to see a live dashboard

[32mTraceloop exporting traces to https://api.traceloop.com authenticating with bearer token
[39m


Loading checkpoint shards: 100%|██████████████████| 2/2 [00:46<00:00, 23.38s/it]


In [2]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
llm_pipeline = pipeline(
    task=llm_task,
    model=llm_model, 
    tokenizer=tokenizer, 
    max_length=max_len,
    temperature=T,
    top_p=0.95,
    repetition_penalty=1.15
)

llm = HuggingFacePipeline(pipeline=llm_pipeline)

#Uploading Files to target location
target = '/home/cdsw/data/'
def upload_file(files):
    """
    """
    file_paths = [file.name for file in files]
    print(file_paths)
    for file in file_paths:
        shutil.copy(file, target)
    return file_paths


#Embedding function which will be used to convert the text to Vector Embeddings
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") #TODO: Find replacement

#Defining LangChain's Chroma Client 
langchain_chroma = Chroma(
client=chroma,
collection_name="default",
embedding_function=embedding_function)

#retriever = langchain_chroma.as_retriever(search_kwargs={"k": 3, "search_type" : "similarity"})

def create_default_collection():
    """
    Create Default Collection in ChromaDB if no collections exists
    """
    chromadb.create_collection("default")
    return "Created Default Collection"



def collection_lists():
    """
    List All Collections available in ChromaDB
    """
    collection_list = []
    chroma_collections = chroma.list_collections()
    if chroma_collections == None:
        create_default_collection()
    else:
        for collection in chroma_collections:
            collection_list.append(collection.name)
    return collection_list

collection_list = collection_lists()

def embed_documents(collection):
    """
    Given a collection name, this function loads PDF documents from a specified directory, preprocesses their content, 
    and then embeds the documents into a vector database using Chroma. 
    After embedding, it deletes the processed PDFs from the directory.
    
    Args:
    - collection (str): Name of the collection to be used in the Chroma vector database.

    Returns:
    - output (str): Message indicating which documents have been embedded.
    """
    loader = DirectoryLoader("/home/cdsw/data/",
                         glob="**/*.pdf",
                         loader_cls=PyPDFLoader,
                         use_multithreading=True)

    documents = loader.load()

    for i in range(len(documents)):
        documents[i].page_content = documents[i].page_content.replace('\t', ' ')\
                                                         .replace('\n', ' ')\
                                                         .replace('       ', ' ')\
                                                         .replace('      ', ' ')\
                                                         .replace('     ', ' ')\
                                                         .replace('    ', ' ')\
                                                         .replace('   ', ' ')\
                                                         .replace('  ', ' ')


    langchain_chroma = Chroma(
    client=chroma,
    collection_name=collection,
    embedding_function=embedding_function)    

    collection = chroma.get_collection(collection) # Needs to be initialized as LangChain cannot add texts

    # Document is chunked per page. Each page will be an entry in the Vector DB
    for doc in documents: 
        collection.add(
            ids=[str(uuid.uuid1())], metadatas=doc.metadata, documents=doc.page_content
        )

    pattern = "/home/cdsw/data/*.pdf"
    files = glob.glob(pattern)
    
    output = f"Documents have been embedded: {files}"
    print(output)
    # Deleting Files
    for file in files:
        os.remove(file)
    
    return output


##### Experimentatal Code ##### 
def set_retriever(collection_name):
    langchain_chroma = Chroma(
    client=chroma,
    collection_name= collection_name,
    embedding_function=embedding_function)
    
    retriever = langchain_chroma.as_retriever(search_kwargs={"k": 2, "search_type" : "similarity"})
    return retriever


# Prompt Template for Langchain
template = """You are a helpful AI assistant. Use only the below provided Context to answer the following question. If you do not know the answer respond with "I don't know."
Context:{context}
>>QUESTION<<{question}
>>ANSWER<<"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

prompt = "Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question:\n\nChat History:\n{chat_history}\nFollow-up Input: {question}\nStandalone question:"
question_template = PromptTemplate.from_template(prompt)


def chain(query, retriever):
    """
    Executes a retrieval-based question-answering chain with specified query and retriever.

    Args:
    - query (str): The query/question to be answered.
    - retriever (Retriever): The retriever object responsible for fetching relevant documents.

    Returns:
    - dict: Response from the RetrievalQA.
    """
    question_generator = LLMChain(llm=llm, prompt = question_template)

    qa_chain = load_qa_with_sources_chain(
        llm=llm,
        chain_type="stuff",
        prompt=QA_CHAIN_PROMPT,
        verbose=True
    )

    conv_chain = ConversationalRetrievalChain(
        retriever=set_retriever(retriever),
        question_generator=question_generator,
        combine_docs_chain=qa_chain,
    )

    return conv_chain(query)

def add_text(history, text):
    """
    Adds the user's text input to the conversation history.

    Args:
    - history (list): The existing conversation history.
    - text (str): The user's input text.

    Returns:
    - list: Updated history with the user's input.
    - str: Empty string (reserved for future use).
    """
    history = history + [(text, None)]
    return history, ""

def bot(history, collection):
    """
    Generates a response using a Language Model and updates the conversation history.

    Args:
    - history (list): The existing conversation history.
    - collection (str): The name of the collection used for document retrieval.

    Returns:
    - list: Updated conversation history including the bot's response.
    """
    response = llm_ans(history[-1][0], collection)
    history[-1][1] = response
    return history

def wrap_text_preserve_newlines(text, width=110):
    """
    Wraps the text while preserving newlines to fit within a specified width.

    Args:
    - text (str): The input text.
    - width (int): The maximum width of the text.

    Returns:
    - str: Wrapped text with newlines preserved.
    """
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    """
    Processes the Language Model's response by wrapping the text and printing the source documents.

    Args:
    - llm_response (dict): The response from the Language Model.

    Returns:
    - str: The wrapped text.
    """
    result = wrap_text_preserve_newlines(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])
    return result    

 

def llm_ans(query, collection):
    """
    Gets the answer from the Language Model including relevant source files.

    Args:
    - query (str): The query/question to be answered.
    - collection (str): The name of the collection used for document retrieval.

    Returns:
    - str: The answer along with relevant source files.
    """
    chat_history = []
    start = time.time()
    llm_response = chain({"question": query, "chat_history": chat_history}, collection)
    end = time.time()
    elapsed_time = end - start

    
    # print(llm_response['result'])
    sources = []
    for source in llm_response["source_documents"]:
        source_file = source.metadata['source']
        source_file = source_file.replace("/home/cdsw/data/", "")
        sources.append(source_file)
    source_files = "\n".join(sources) 
    ans = llm_response['answer'] + "\n \n Relevant Sources: \n" + source_files + "\n \n Elapsed Time: " + str(round(elapsed_time,2)) + " seconds"
    return ans

def reset_state():
    """
    Resets the Gradio UI
    """
    return [], [], None

#Gradio UI Code Block

# with gr.Blocks() as demo:
#     with gr.Tab("FileGPT"):
#         chatbot = gr.Chatbot([], elem_id="chatbot").style(height=650)
#         with gr.Row():
#             with gr.Column(scale=4):
#                 with gr.Column(scale=12):
#                     user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
#                         container=False)
#                 with gr.Column(min_width=32, scale=1):
#                     submitBtn = gr.Button("Submit", variant="primary")
#             with gr.Column(scale=1):
#                 collection_dropdown = gr.Dropdown(collection_list, label="Chroma Collections", info="Choose a Collection to Query",value = "default", max_choices=1)
#                 emptyBtn = gr.Button("Clear History")
#         user_input.submit(add_text, [chatbot, user_input], [chatbot, user_input]).then(bot, [chatbot, collection_dropdown], chatbot)
#         submitBtn.click(add_text, [chatbot, user_input], [chatbot, user_input]).then(bot, [chatbot, collection_dropdown], chatbot)
#         history = gr.State([])
#         past_key_values = gr.State(None)
#         emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
        

#     with gr.Tab("Upload File"):
#         with gr.Row():
#             title="Falcon 40B",
#             with gr.Column(scale=4):
#                 file_output = gr.File()
#                 upload_button = gr.UploadButton("Click to Upload a File", file_types=[".pdf",".csv",".doc"], file_count="multiple")
#                 upload_button.upload(upload_file, upload_button, file_output)
#             with gr.Column(scale=1):
#                 embed_dropdown = gr.Dropdown(collection_list, label="Chroma Collections", info="Choose a Collection to Query", value = "default", max_choices=1)
#                 embed_button = gr.Button("Embed Document", variant="primary")
#                 txt_3 = gr.Textbox(value="", label="Output")
                
                
#     embed_button.click(embed_documents, embed_dropdown, show_progress=True, outputs=[txt_3])
    


    
# demo.queue()


# if __name__ == "__main__":
#     demo.launch(share=True,enable_queue=True,show_error=True,server_name='127.0.0.1', server_port=int("8095")) 

#     print("Gradio app ready")

# Remembering chat history 👌

In [23]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

def set_retriver(collection_name):
    langchain_chroma = Chroma(
    client=chroma,
    collection_name= collection_name,
    embedding_function=embedding_function)
    
    retriever = langchain_chroma.as_retriever(search_kwargs={"k": 2, "search_type" : "similarity"})
    return retriever



qa = ConversationalRetrievalChain.from_llm(llm, set_retriver("Quantization_Test"), memory=memory)



In [24]:
query = "What does splicing mean ?"
result = qa(query)



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [25]:
result["answer"]

' Splicing refers to the process of combining exons into a single RNA molecule during gene expression. It involves the removal of introns (non-coding regions) and the joining of exons (coding regions), resulting in a mature RNA molecule that can be translated into protein. There are two main types of splicing: intronic splicing, which occurs within the introns, and exonic splicing, which occurs between exons. The splicing process is crucial for the proper expression of genetic information and plays a critical role in many biological processes, including development, cell signaling, and immune response.'

In [26]:
query = "Summarize the previous answer"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [27]:
result['answer']

' Sure! Splicing is a process that happens after the genetic code has been translated into messenger RNA (mRNA). At this point, the mRNA molecule has a bunch of introns (non-coding regions) and exons (coding regions). To make a fully functional protein, the introns need to be removed and the exons need to be joined together. This is done by enzymes called splicing factors, which recognize specific sequences in the mRNA and perform the splicing reaction. There are many different ways that splicing can occur, depending on the specific sequence of the mRNA and the splicing factors involved. Some common variations include:\n\n* Skipping one or more exons: This can happen if the splicing factors recognize a specific sequence in the mRNA that signals them to skip over one or more exons. This can result in a shorter protein that may not have full functionality.\n* Including extra exons: This can happen if the splicing factors recognize a specific sequence in the mRNA that signals them to incl

In [28]:
query = "Great, now explain it to me like I am 5 years old"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [29]:
result['answer']

' Splicing factors recognize the right sequences in the mRNA through a combination of hydrogen bonding, electrostatic interactions, and base pairing. The splicing factor binds to specific nucleotides in the intron and exon regions of the mRNA, which are recognized by specific conformations of the double helix. These interactions create a stable complex between the splicing factor and the mRNA, allowing the splicing reaction to occur efficiently.'

## Using a different model for condensing the question

In [33]:
qa = ConversationalRetrievalChain.from_llm(
    llm,
    set_retriver("Quantization_Test"),
    condense_question_llm = llm,
    memory=memory
)

In [34]:
query = "What does splicing mean ?"
result = qa(query)



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [35]:
result["answer"]

' Splicing is the process by which genetic information from one or more genes is combined into a single molecule of RNA. This process involves the removal of introns (non-coding regions within the gene) and the joining of exons (coding regions within the gene). The resulting RNA molecule, known as messenger RNA (mRNA), then travels out of the nucleus and into the cytoplasm where it serves as a template for protein synthesis.'

In [36]:
query = "Summarize the previous answer"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [37]:
result['answer']

' Splicing is the process by which genetic information from one or more genes is combined into a single molecule of RNA. This process involves the removal of introns (non-coding regions within the gene) and the joining of exons (coding regions within the gene). The resulting RNA molecule, known as messenger RNA (mRNA), then travels out of the nucleus and into the cytoplasm where it serves as a template for protein synthesis.'

In [38]:
query = "Great, now explain it to me like I am 5 years old"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [39]:
result['answer']

' Splicing is the process by which genetic information from one or more genes is combined into a single molecule of RNA. This process involves cutting and pasting different parts of the gene, creating a new molecule of RNA that contains only the desired part(s).'

## Using a custom prompt for condensing the question 👌

In [27]:
from langchain.prompts.prompt import PromptTemplate

def set_retriver(collection_name):
    langchain_chroma = Chroma(
    client=chroma,
    collection_name= "default",
    embedding_function=embedding_function)
    
    retriever = langchain_chroma.as_retriever(search_kwargs={"k": 2, "search_type" : "similarity"})
    return retriever

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [28]:
custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

In [29]:
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

In [30]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa = ConversationalRetrievalChain.from_llm(
    llm,
    set_retriver("Quantization_Test"),
    condense_question_prompt=CUSTOM_QUESTION_PROMPT,
    memory=memory
)

In [31]:
query = "What does splicing mean ?"
result = qa(query)



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [32]:
result["answer"]

' Splicing refers to the process of joining exons together during gene expression. It involves recognizing the introns (non-coding regions) and removing them so that the exons (coding regions) can be joined together to form a continuous RNA molecule. This process is crucial for the proper synthesis of proteins from genetic information.'

In [33]:
query = "Summarize the previous answer"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [34]:
result['answer']

' Sure! Splicing is the process by which the genetic code is edited before it is translated into proteins. It involves cutting out certain parts of the DNA sequence and joining others together in a particular order. There are many different ways that splicing can occur, depending on the specific genetic code being edited and the conditions under which it is done. One common way that splicing occurs is through the use of specialized enzymes called splicing factors, which recognize specific sequences in the DNA code and cut or join other sequences accordingly. These splicing factors can either remove or add extra sections of DNA code, creating different versions of the same protein. Another way that splicing can occur is through the action of intronic regions, which are located within the DNA code itself rather than near its edges. These intronic regions can fold back upon themselves in such a way as to cover up part of another section of the code, effectively "editing" it without actual

In [35]:
query = "Great, now explain it to me like I am 5 years old"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [36]:
result['answer']

' Sure! Splicing is like building a puzzle. You start with a bunch of little pieces (called exons), and then you put them together in a special way to form a complete picture (the final product). But sometimes one or more pieces might get left out, which can change how the final product looks. That\'s kind of like what happens when we talk about "exon skipping".'

## Return Source Documents

In [49]:
from langchain.prompts.prompt import PromptTemplate

def set_retriver(collection_name):
    langchain_chroma = Chroma(
    client=chroma,
    collection_name= collection_name,
    embedding_function=embedding_function)
    
    retriever = langchain_chroma.as_retriever(search_kwargs={"k": 2, "search_type" : "similarity"})
    return retriever

# qa = ConversationalRetrievalChain.from_llm(
#     llm,
#     set_retriver("default"),
#     condense_question_llm = llm,
#     memory=memory,
#     return_source_documents=True
# )


_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [50]:
custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

In [51]:
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

In [52]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True,  output_key='answer')
qa = ConversationalRetrievalChain.from_llm(
    llm,
    set_retriver("default"),
    condense_question_prompt=CUSTOM_QUESTION_PROMPT,
    condense_question_llm = llm,
    memory=memory,
    return_source_documents=True
)

In [53]:
query = "What does splicing mean ?"
result = qa(query)



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [54]:
result["answer"]

' Splicing refers to the process of joining exons together during gene expression. It involves recognizing the introns (non coding regions) and removing them so that the exons (coding regions) can be joined together to form a continuous RNA molecule. This process occurs in the nucleus before the RNA molecule is transported to the cytoplasm where it is translated into protein.'

In [48]:
result["source_documents"][0]

Document(page_content='184T. Yokota et al 6. Yokota T, Lu Q, Morgan JE, et al. Expansion of revertant fibers in dystrophic mdx muscles reflects activity of muscle precursor cells and serves as an index of muscle regeneration. J Cell Sci 2006;119:2679-87. 7. Lu QL, Morris GE, Wilton SD, et al. Massive idiosyncratic exon skipping corrects the nonsense mutation in dystrophic mouse mus - cle and produces functional revertant fibers by clonal expansion. J Cell Biol 2000;148:985-96. 8. Dietz HC, Kendzior RJ Jr. Maintenance of an open reading frame as an additional level of scrutiny during splice site selection. Nat Genet 1994;8:183-8. 9. Naylor J, Brinke A, Hassock S, Green PM, Giannelli F. Charac - teristic mRNA abnormality found in half the patients with severe haemophilia A is due to large DNA inversions. Hum Mol Genet 1993;2:1773-8. 10. Lu QL, Lou F, Bou-Gharios G, et al. Functional amounts of dys - trophin produced by skipping the mutated exon in the mdx dys - trophic mouse. Nat Med 200

In [55]:
query = "Summarize the previous answer"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [56]:
result["answer"]

' Sure! Splicing is the process by which the genetic code is edited before it is translated into proteins. It involves cutting out certain parts of the DNA sequence and joining others together in a particular order. There are many different ways that splicing can occur, depending on the specific genetic code being edited and the conditions under which it is done. One common way that splicing occurs is through the use of specialized molecules called "splicing factors." These molecules recognize specific sequences within the DNA code and bind to them, causing the relevant sections to be cut out and joined together in a particular order. Another way that splicing can occur is through the action of enzymes called "exonucleases," which can remove specific sections of the DNA code and then join other sections back together again. Overall, splicing plays a crucial role in determining the final form of the proteins that are produced from our genetic code. Without it, we would not be able to cr

In [57]:
query = "Great, now explain it to me like I am 5 years old"
result = qa({"question": query})



[1m> Entering new ConversationalRetrievalChain chain...[0m

[1m> Finished chain.[0m


In [58]:
result["answer"]

' Sure! Splicing is like building a puzzle. You start with a bunch of little pieces (called exons), and then you put them together in a special way to form a complete picture (the final product). But sometimes, one or more of these pieces might not fit quite right, so you need to find a new way to connect them. That\'s kind of like what happens when we talk about "splicing" a gene. We\'re finding a new way to connect the pieces of information inside the gene so they can come together correctly and make sense.'