In [1]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [2]:
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [4]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['OPENAI_API_KEY'] =  os.getenv("OPENAI_API_KEY")

# Loading Document

In [13]:
document = PyPDFLoader("D:\internship\Verisk\DocumentSummarizer\document.pdf")
pages = document.load_and_split()


  document = PyPDFLoader("D:\internship\Verisk\DocumentSummarizer\document.pdf")


In [14]:
pages[0]

Document(metadata={'source': 'D:\\internship\\Verisk\\DocumentSummarizer\\document.pdf', 'page': 0}, page_content='Subham Adhikari  \nBhadrabas -04, Kageshwori Manohara, Kathmandu  \nsubhamadhikari7@gmail.com • + 977 -974842596 4  \nGitHub: github.com /subhamadhikari  \n \nEDUCATION  \nIslington College          (2022 -2025)  \nB.Sc. (hons) Computing with AI  \n• Completed the first year achieving grade A in every module  (Robotics & IOT, Fundamentals of \nComputing, Programming, Calculus & Linear Algebra) . \n• During the second year I completed the modules: Software Engineering, Data Structure and \nSpecialist Programming, Database , Further Calculus, Probability & Statistics, Data Science  \nRelevant Coursework  \n• Java Swing GUI Application:  Developed a graphical user interface (GUI) based banking application \nusing Java Swing, simulating banking operations like account management and transaction \nprocessing.  \n• TravelHaunt - Full Stack Web Application:  Designed and implemen

In [15]:
text = document.load()

In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200,chunk_overlap=300)
splitted_doc = text_splitter.split_documents(text)

In [17]:
len(splitted_doc)

5

# Embedding and Vector Store

In [19]:
ollama_emb = OllamaEmbeddings(
    model="gemma2:2b",
)
vector_store = Chroma.from_documents(documents=splitted_doc,embedding=ollama_emb)
# vector_store = Chroma.from_documents(documents=splitted_doc,embedding=OpenAIEmbeddings())

In [21]:
question = "What are the mentioned projects"
result = vector_store.similarity_search(question)
result[0].page_content
# result

'Subham Adhikari  \nBhadrabas -04, Kageshwori Manohara, Kathmandu  \nsubhamadhikari7@gmail.com • + 977 -974842596 4  \nGitHub: github.com /subhamadhikari  \n \nEDUCATION  \nIslington College          (2022 -2025)  \nB.Sc. (hons) Computing with AI  \n• Completed the first year achieving grade A in every module  (Robotics & IOT, Fundamentals of \nComputing, Programming, Calculus & Linear Algebra) . \n• During the second year I completed the modules: Software Engineering, Data Structure and \nSpecialist Programming, Database , Further Calculus, Probability & Statistics, Data Science  \nRelevant Coursework  \n• Java Swing GUI Application:  Developed a graphical user interface (GUI) based banking application \nusing Java Swing, simulating banking operations like account management and transaction \nprocessing.  \n• TravelHaunt - Full Stack Web Application:  Designed and implemented a full -stack application \nusing JSP and SQL. The project facilitated room hosting and booking services, with

## Chain and Retrievals

In [22]:
# llm = ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0) # temperature-0 : applying softmax and random sampling
llm = Ollama(model="gemma2:2b")

In [23]:
template = """
Answer the given question based only on the given context
context: {context},

If you find anything outside of the context reply that you are unable to do so, just request the user to 
ask proper question.

question: {question}
"""
prompt =  ChatPromptTemplate.from_template(template)

In [24]:
document_chain = create_stuff_documents_chain(llm,prompt)

In [25]:
retriever = vector_store.as_retriever()
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [26]:
question = "Give me 1 project to add in the document"

In [27]:
chain = prompt | llm
response = chain.invoke({"context":retriever.get_relevant_documents(question),"question":question})

  warn_deprecated(


"Based on the provided text, here is a project mentioned within the document:  \n* **Temperature & Humidity Monitoring System**\n\nLet me know if you'd like to explore other projects or details! \n"

In [28]:
response

"Based on the provided text, here is a project mentioned within the document:  \n* **Temperature & Humidity Monitoring System**\n\nLet me know if you'd like to explore other projects or details! \n"

## Conversational Memory

In [29]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [30]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If the answer is not available in the retrieved context. Just give relevant message that you are unable to do so. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
contextualize_q_system_prompt = (
    "Given a chat history, and the latest user question "
    "which might reference context in the chat history AND PROVIDED DOCUMENT, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [31]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [32]:
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [34]:
conversational_rag_chain.invoke(
    {"input": "What are the mentioned projects"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'The mentioned projects include a MERN stack application, Human Emotion Detection Project, Temperature & Humidity Monitoring System, and Python Terminal Stock Management System. \n'

In [35]:
conversational_rag_chain.invoke(
    {"input": "Recommend what additional project can i add"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'You could explore projects related to web development, AI/machine learning, or data analysis.  For example, you could create a chatbot or develop an image recognition system.   \n\n\n'

In [36]:
conversational_rag_chain.invoke(
    {"input": "Give me something different"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

"The provided text details Subham Adhikari's skills and academic background in the areas of software engineering, programming, data science, and AI. \n\nIt highlights his expertise in various technologies including Python, JavaScript, ReactJS, Express.js, TensorFlow, MongoDB, and Firebase along with database tools like SQL and OracleDB.  \n"