In [12]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS

In [31]:
import os
#from langchain.text_splitter import RecursiveCharacterTextSplitter
#from langchain.embeddings.openai import OpenAIEmbeddings
#from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
#from langchain.llms import OpenAI
import streamlit as st
from langchain.chat_models import ChatOpenAI 

In [13]:
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
pdf_path = 'data/LILRB2:PirB mediates macrophage recruitment in fibrogenesis of nonalcoholic steatohepatitis.pdf'
loader = PyPDFLoader(file_path=pdf_path)
doc = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, 
chunk_overlap= 100, 
separators=["\n\n","\n"," ",""]) 
text = text_splitter.split_documents(documents= doc)

In [20]:
text[2]

Document(page_content='reveal the role of PirB/LILRB2 in NASH pa thogenesis and identify PirB/LILRB2-\nANGPTL8 signaling as a potential target for the management or treatmentof NASH.\nNonalcoholic fatty liver disease (NAFLD), which is one of the most\ncommon liver disorders, is associated with increased overall mor-tality. Nonalcoholic steatohepatitis (NASH), the in ﬂammatory sub-\ntype of NAFLD, has a higher probability of progressing to end-stageliver diseases, such as cirrhosis and hepatic carcinoma\n1,2. However,\nthere remain dif ﬁculties in the treatment of NASH due to a lack of\napproved pharmacological agents3. Therefore, it is urgent to identify\nnovel targets for better implementation of NASH treatment.Liver macrophages have been shown to be involved in the pro-\ngression of steatohepatitis and subsequent hepatic ﬁbrosis4. The liver\nharbors the largest proportion (~80%) of macrophages in the body5,', metadata={'source': 'data/LILRB2:PirB mediates macrophage recruitment in fi

In [19]:
text[2].metadata

{'source': 'data/LILRB2:PirB mediates macrophage recruitment in fibrogenesis of nonalcoholic steatohepatitis.pdf',
 'page': 0}

In [21]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

In [22]:
from langchain.vectorstores import FAISS

In [23]:
new_vector_store =FAISS.load_local("vectors", embeddings, allow_dangerous_deserialization=True)

In [24]:
retriver = new_vector_store.as_retriever()

In [25]:
qa = retriver.get_relevant_documents("what is Nash ? ")

In [28]:
print(qa[0].metadata)

{'source': 'data/LILRB2:PirB mediates macrophage recruitment in fibrogenesis of nonalcoholic steatohepatitis.pdf', 'page': 1}


In [29]:
print(qa[0])

page_content='during NASH. To elucidate whether such a mechanism was at play inArticle https://doi.org/10.1038/s41467-023-40183-3\nNature Communications |         (2023) 14:4436 2' metadata={'source': 'data/LILRB2:PirB mediates macrophage recruitment in fibrogenesis of nonalcoholic steatohepatitis.pdf', 'page': 1}


In [32]:
import os
os.environ['OPENAI_API_KEY']= ""

In [33]:
from langchain.chat_models import ChatOpenAI 

In [34]:
from langchain.memory import ConversationBufferMemory

In [39]:
from langchain.memory import ConversationBufferWindowMemory

In [40]:
llm=ChatOpenAI(model_name='gpt-3.5-turbo-0125')
conversation = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever= retriver,
    memory=ConversationBufferWindowMemory( k=3)
)

In [45]:
# conversation.invoke({'question':'what is Nash ?'})

In [46]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


from langchain import hub

In [62]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = new_vector_store.as_retriever(search_kwargs={"k": 1})
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)




In [63]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [64]:
from langchain.chains import ConversationChain

In [65]:
conversation = ConversationChain(
    llm=llm,
    verbose=True,
    memory=ConversationBufferWindowMemory( k=3)
)

In [66]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [67]:
rag_chain.invoke("what is NASH ?")

'NASH stands for Nonalcoholic Steatohepatitis, which is a type of liver disease characterized by inflammation and liver cell damage. It is often associated with obesity and other metabolic risk factors. Research is ongoing to understand the mechanisms involved in the development and progression of NASH.'

In [68]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)



In [69]:
a = rag_chain_with_source.invoke("what is NASH ?")

In [71]:
a

{'context': [Document(page_content='during NASH. To elucidate whether such a mechanism was at play inArticle https://doi.org/10.1038/s41467-023-40183-3\nNature Communications |         (2023) 14:4436 2', metadata={'source': 'data/LILRB2:PirB mediates macrophage recruitment in fibrogenesis of nonalcoholic steatohepatitis.pdf', 'page': 1})],
 'question': 'what is NASH ?',
 'answer': 'NASH stands for nonalcoholic steatohepatitis, a type of liver disease characterized by inflammation and liver cell damage. It is often associated with obesity and other metabolic risk factors. The mechanism of NASH is still being studied to understand its development and progression.'}

In [70]:
a['context'][0].metadata

{'source': 'data/LILRB2:PirB mediates macrophage recruitment in fibrogenesis of nonalcoholic steatohepatitis.pdf',
 'page': 1}