In [1]:
from dotenv import load_dotenv

if load_dotenv():
    print("Dotenv loaded successfully")

Dotenv loaded successfully


In [2]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings

# NVIDIAEmbeddings.get_available_models()
embedder = NVIDIAEmbeddings(model="nvidia/nv-embed-v1", truncate="END")

# ChatNVIDIA.get_available_models()
instruct_llm = ChatNVIDIA(model="meta/llama-3.3-70b-instruct")

In [None]:
from langchain_community.document_loaders import ArxivLoader

documents = ArxivLoader(query="1706.03762").load(),  ## Attention Is All You Need

for doc in documents:
    content = doc[0].page_content
    if "References" in content:
        doc[0].page_content = content[:content.index("References")]

In [5]:
documents[0][0].page_content

'Generative AI for Healthcare:\nFundamentals, Challenges, and Perspectives\nGang Chen1, Changshuo Liu3, Gene Anne Ooi4, Marcus Tan5, Zhongle Xie2,\nJianwei Yin1,2, James Wei Luen Yip5, Wenqiao Zhang2, Jiaqi Zhu3* & Beng Chin Ooi1,2\n1College of Computer Science and Technology, Zhejiang University, Hangzhou 310027, China\n2College of Software Technology, Zhejiang University, Ningbo 315100, China\n3School of Computing, National University of Singapore, Singapore 117417\n4Singapore General Hospital, Singapore 169608\n5National University Hospital, Singapore 119074\nAbstract\nGenerative Artificial Intelligence (GenAI) is taking the world by storm. It promises transformative opportunities for advanc-\ning and disrupting existing practices, including healthcare. From large language models (LLMs) for clinical note synthesis\nand conversational assistance to multimodal systems that integrate medical imaging, electronic health records (EHRs), and\ngenomic data for decision support, GenAI is tra

In [6]:
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100,
    separators=["\n\n", "\n", ".", ";", ",", " "],
)

splitted_documents = text_splitter.split_documents(documents[0])

vectorstore = FAISS.from_documents(splitted_documents, embedder)

In [None]:
vectorstore.as_retriever().invoke("Encoder")

[Document(id='921f1ebe-2141-42ee-bf03-4612fd64b719', metadata={'Published': '2025-10-28', 'Title': 'Generative AI for Healthcare: Fundamentals, Challenges, and Perspectives', 'Authors': 'Gang Chen, Changshuo Liu, Gene Anne Ooi, Marcus Tan, Zhongle Xie, Jianwei Yin, James Wei Luen Yip, Wenqiao Zhang, Jiaqi Zhu, Beng Chin Ooi', 'Summary': 'Generative Artificial Intelligence (GenAI) is taking the world by storm. It\npromises transformative opportunities for advancing and disrupting existing\npractices, including healthcare. From large language models (LLMs) for clinical\nnote synthesis and conversational assistance to multimodal systems that\nintegrate medical imaging, electronic health records, and genomic data for\ndecision support, GenAI is transforming the practice of medicine and the\ndelivery of healthcare, such as diagnosis and personalized treatments, with\ngreat potential in reducing the cognitive burden on clinicians, thereby\nimproving overall healthcare delivery. However, GenA

In [8]:
# Save the vectorstore
vectorstore.save_local("vector_index")

In [9]:
# Load the vector store
vectorstore = FAISS.load_local("vector_index", embedder, allow_dangerous_deserialization=True)

In [None]:
vectorstore.as_retriever().invoke("Encoder")

[Document(id='921f1ebe-2141-42ee-bf03-4612fd64b719', metadata={'Published': '2025-10-28', 'Title': 'Generative AI for Healthcare: Fundamentals, Challenges, and Perspectives', 'Authors': 'Gang Chen, Changshuo Liu, Gene Anne Ooi, Marcus Tan, Zhongle Xie, Jianwei Yin, James Wei Luen Yip, Wenqiao Zhang, Jiaqi Zhu, Beng Chin Ooi', 'Summary': 'Generative Artificial Intelligence (GenAI) is taking the world by storm. It\npromises transformative opportunities for advancing and disrupting existing\npractices, including healthcare. From large language models (LLMs) for clinical\nnote synthesis and conversational assistance to multimodal systems that\nintegrate medical imaging, electronic health records, and genomic data for\ndecision support, GenAI is transforming the practice of medicine and the\ndelivery of healthcare, such as diagnosis and personalized treatments, with\ngreat potential in reducing the cognitive burden on clinicians, thereby\nimproving overall healthcare delivery. However, GenA

In [14]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.document_transformers import LongContextReorder
from langchain_core.runnables import RunnableLambda

long_reorder = RunnableLambda(LongContextReorder().transform_documents)

def docs2str(docs, title="Document"):
    """Useful utility for making chunks into context string. Optional, but useful"""
    out_str = ""
    for doc in docs:
        doc_name = getattr(doc, 'metadata', {}).get('Title', title)
        if doc_name:
            out_str += f"[Quote from {doc_name}] "
        out_str += getattr(doc, 'page_content', str(doc)) + "\n"
    return out_str

context_prompt = ChatPromptTemplate.from_template(
    "Answer the question using only the context"
    "\n\nRetrieved Context: {context}"
    "\n\nUser Question: {question}"
    "\nAnswer the user conversationally. User is not aware of context."
)

chain = (
    {
        'context': vectorstore.as_retriever() | long_reorder | docs2str,
        'question': (lambda x:x)
    }
    | context_prompt
    | instruct_llm
    | StrOutputParser()
)

In [15]:
def chat_gen(message, history=[], return_buffer=True):
    buffer = ""

    ## Then, stream the results of the stream_chain
    for token in chain.stream(message):
        buffer += token
        ## If you're using standard print, keep line from getting too long
        yield buffer if return_buffer else token

In [None]:
import gradio as gr

initial_msg = (
    "Hello! I am a document chat agent here to help the user!"
    " I have access to the following Paper: Attention Is All You Need\n\nHow can I help you?"
)

chatbot = gr.Chatbot(value = [[None, initial_msg]])
demo = gr.ChatInterface(chat_gen, chatbot=chatbot).queue()

try:
    demo.launch(debug=True, share=True, show_api=False)
    demo.close()
except Exception as e:
    demo.close()
    print(e)
    raise e

  from .autonotebook import tqdm as notebook_tqdm
  chatbot = gr.Chatbot(value = [[None, initial_msg]])


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://86d228a841a82b4ead.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://86d228a841a82b4ead.gradio.live
Closing server running on port: 7860


In [None]:
# What is an encoder made of?