## import required libraries

In [23]:
import os
import ollama
from dotenv import load_dotenv


from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS


from langchain_groq import ChatGroq
from langchain.schema import HumanMessage,SystemMessage
from langchain_core.output_parsers import StrOutputParser

In [6]:
# getting data from environment variables
load_dotenv()
groq_api_key=os.getenv("GROQ_API_KEY")


## Data ingestion 
Data scraping from the website

In [41]:
# html_loader=WebBaseLoader("https://docs.smith.langchain.com/tutorials/Developers/evaluation")

html_loader=WebBaseLoader("https://en.wikipedia.org/wiki/Tyagaraja")

In [42]:
docs=html_loader.load()

In [43]:
docs

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Tyagaraja', 'title': 'Tyagaraja - Wikipedia', 'language': 'en'}, page_content='\n\n\n\nTyagaraja - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact usDonate\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\n Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nPersonal life and 

## chuncking the huge data

In [44]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [45]:
documents

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Tyagaraja', 'title': 'Tyagaraja - Wikipedia', 'language': 'en'}, page_content='Tyagaraja - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact usDonate\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\n Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nPersonal life and backgrou

## converting the vectors embeding

based on cosine similarity which uses embedings

In [46]:
embeddings = OllamaEmbeddings(model="mxbai-embed-large:latest")


## storing the embeding in fiass vector store

In [47]:
vectorstore_db=FAISS.from_documents(documents,embeddings)

In [52]:
vectorstore_db

<langchain_community.vectorstores.faiss.FAISS at 0x1caac65ce20>

## retriving the data

In [53]:
# Function to retrieve the most relevant document based on the user query
def retrieve_relevant_document(query):
    # Use the vector store to find the most relevant document for the query
    relevant_document = vectorstore_db.similarity_search(query, k=2)  # Adjust k as needed
    return relevant_document[0].page_content if relevant_document else None

In [56]:
print(retrieve_relevant_document("when tyagayya born?"))

Tyagaraja was born Kakarla Tyagabrahmam in 1767[Note 1] to a Telugu Vaidiki Mulakanadu Brahmin family[4][5] in Tiruvarur in present-day Tiruvarur District of Tamil Nadu. There is a school of thought led by musicologist B. M. Sundaram that contests this and proposes Tiruvaiyaru as his birthplace. He is a famous musician and his family name 'Kakarla' indicates that they were originally migrants from the village of the same name in the Cumbum taluk of Prakasam district, Andhra Pradesh. His family belonged to the Smarta tradition and Bharadvaja gotra.  Tyagaraja was the third son of his parents, and Panchanada Brahmam and Panchapakesha Brahmam were his elder brothers. He was named Tyagabrahmam/Tyagaraja after Tyagaraja, the presiding deity of the temple at Thiruvarur, the place of his birth. Tyagaraja's maternal uncle was Giriraja Kavi. Giriraja Kavi was a poet and musician. Giriraja was born in Kakarla village, Cumbum taluk in Prakasam district, Andhra Pradesh.[6] He is believed to have


In [51]:
# # Initialize the ChatGroq model with the specified model ID and API key
# model = ChatGroq(model="Gemma2-9b-It", groq_api_key=groq_api_key)

# # Initialize the output parser
# parser = StrOutputParser()

# # Function to retrieve the most relevant document based on the user query
# def retrieve_relevant_document(query):
#     # Use the vector store to find the most relevant document for the query
#     relevant_document = vectorstore_db.similarity_search(query, k=1)  # Adjust k as needed
    
#     # Log the retrieved document details
#     if relevant_document:
#         print("Retrieved document from vector store:")
#         print("Document content:", relevant_document[0].page_content)
#         print("Document metadata:", relevant_document[0].metadata)
#     else:
#         print("No relevant document found in the vector store.")
    
#     return relevant_document[0].page_content if relevant_document else None

# # Get user input
# input_text1 = input("Please enter your query: ")

# # Retrieve the relevant document based on the user query
# relevant_doc = retrieve_relevant_document(input_text1)

# # Confirm whether a relevant document was found
# if relevant_doc:
#     print("Relevant document found and will be used in the model input.")
# else:
#     print("No relevant document found. Proceeding without it.")

# # Prepare the messages for sending to the model
# if relevant_doc:
#     messages = [
#         SystemMessage(content="You are a helpful assistant."),
#         HumanMessage(content=f"{input_text1}\n\nRelevant Document: {relevant_doc}")
#     ]
# else:
#     messages = [
#         SystemMessage(content="You are a helpful assistant."),
#         HumanMessage(content=input_text1)
#     ]

# # Invoke the model and get the result
# result = model.invoke(messages)

# # Parse and print the result
# parsed_output = parser.invoke(result)

# print("parsed output")
# print(parsed_output)