In [13]:
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaEmbeddings
# from langchain_mistralai import MistralAIEmbeddings

from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain.docstore.document import Document

from bs4 import BeautifulSoup
import requests

In [14]:
# Fetch the webpage content
url = "https://ramaniitrgoyal92.github.io/"
response = requests.get(url)

soup = BeautifulSoup(response.text, 'html.parser')
main_text = soup.get_text(strip=True)
print(main_text)

with open('Raman_website_text.txt', 'w') as file:
    file.write(main_text)

About me - Raman Goyal, Ph.D.Raman Goyal, Ph.D.PublicationsTalksCVRaman GoyalResearch ScientistFollowSan Francisco Bay AreaSRI InternationalEmailGoogle ScholarResearchGateGithubLinkedInAbout meI am currently working as an Advanced Computer Scientist at the renowned Palo Alto Research Center (PARC), part ofSRI International. At PARC-SRI, I am involved in several government and commercially funded interdisciplinary data science projects in the area of Reinforcement Learning, Machine Learning, Motion Planning, Controls & Optimization. I graduated with a Ph.D. from the Department of Aerospace Engineering atTexas A&M Universityin 2020 with a focus on optimal control of soft robotics using data-driven reinforcement learning approaches. I completed my Bachelor’s in Mechanical Engineering from Indian Institute of Technology Roorkee (IIT Roorkee) in 2013.Research Interest:Motion Planning and Feedback Control for Autonomous SystemsReinforcement Learning Algorithms for High-DOF Robotic SystemsOpt

In [15]:
model = "llama3.1"  # "mistral" | "llama3" | "phi3" | "dolphin-llama3"
 
llm = ChatOllama(model=model)

embd = OllamaEmbeddings(model=model)

doc = f"model name: Raman_website, description: {main_text}"
db = Chroma.from_documents([Document(page_content=doc)], embd, persist_directory="chrome_text")

retriever = db.as_retriever(search_kwargs={"k": 2})

Without RAG

In [16]:
question = "Who is Raman Goyal?"
llm_answer = llm.invoke(question)
llm_answer.content

"I don't have enough information to provide a specific answer about who Raman Goyal is. There may be many individuals with that name, and without more context or details, it's challenging to identify the one you're referring to.\n\nCould you please provide more information or clarify which Raman Goyal you are asking about? For example, might they be a public figure, an academic, an artist, or someone else? This will help me better understand your question and attempt to give you a more accurate answer."

With RAG

In [17]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [18]:
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

llm_rag_answer = rag_chain.invoke(question)
llm_rag_answer

"Raman Goyal is a Research Scientist who currently works as an Advanced Computer Scientist at the Palo Alto Research Center (PARC), part of SRI International. He holds a Ph.D. from Texas A&M University and a Bachelor's in Mechanical Engineering from Indian Institute of Technology Roorkee."