In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

## Langsmith Tracking And Tracing
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

os.environ["LANGCHAIN_TRACING_V2"]="true"

In [3]:
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
embedding = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
FILE_PATH = r"D:\Agentic_AI\2-Langchain\2.4-VectorDatabase\1 Dynamic weighted hypergraph convolutional network for brain functional (1).pdf"

In [7]:
loader = PyPDFLoader(FILE_PATH)

In [8]:
pages=loader.load()
len(pages)

13

In [9]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50)
split_docs = splitter.split_documents(pages)

In [10]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [15]:
index = faiss.IndexHNSWFlat(768,10)

In [16]:
vector_store=FAISS(
    embedding_function=embedding,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [17]:
vector_store.add_documents(documents=split_docs)

['cb0000de-2b37-4afd-8834-0fea4254874b',
 'c2349165-628a-4e71-a5f2-ec253a90d08e',
 '925e1a2b-88f5-4b5a-b629-1f62dbb2fe2d',
 'da7cdf63-5c53-4f35-b895-1a592f376c4d',
 'f81f6b76-383c-48ae-bd74-8e4dbbb71800',
 '1068a62c-3a36-4eeb-b202-ffaf022857f4',
 '509f51a7-fc7a-4f76-aa3b-a340582f5f00',
 '21d80b78-5446-441b-9f60-32c5d14ca8fe',
 'f4b4fece-df85-4b7f-a51f-fdc28457468d',
 '9b826cbd-3fd5-429d-988f-837547e845ff',
 '4a2bea5b-5149-4619-ad19-39744cbe44ed',
 '4212e91a-bf0f-4767-aa43-5a7058771941',
 'c79615f4-4751-449b-848b-3153020f701a',
 '50791e9b-4132-442d-bd39-79d559a5bac6',
 'b04f11c4-9d25-49c2-bb68-d38aa70809b6',
 'f3b41a97-95c9-4a1b-bea8-4117fa087c98',
 '25c801c4-5d7d-4cc0-8d16-164bcc5b06ba',
 '670c601e-3623-4d91-b043-0a4e8e4c26b9',
 'da2ffe7a-1e51-4415-89ef-608b708386fc',
 'dbecac66-711d-44a3-aa33-51ce0eb9ae47',
 '22c8467c-93f4-414d-b661-6a442ec2a012',
 'a1bca1ad-97f4-4f21-bba5-d24f13631ef0',
 '16bfb0d7-3057-4b50-8b71-f7405b2d9758',
 '010eff67-8533-4d35-8a76-a8fc1a81d428',
 '58669a4b-daef-

In [18]:
retriever=vector_store.as_retriever(
    search_kwargs={"k": 10} #hyperparameter
)

In [19]:
from langchain_groq import ChatGroq
model=ChatGroq(model="llama-3.3-70b-versatile")

In [20]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [21]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [22]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [26]:
rag_chain.invoke("model used in the paper?")

'The model used in the paper is dwHGCN (dynamic weighted Hypergraph Convolutional Network). It is a hypergraph-based model that adaptively updates the hyperedge weights during training. The paper also compares the performance of dwHGCN with other models, including GAT, HGNN, and graph-based models like GCN and TAG.'