In [15]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HF_API_KEY"] = os.getenv("HF_API_KEY")
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


In [16]:
# from langchain_huggingface import HuggingFaceEmbeddings

# embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [17]:
pinecone_api_key = os.getenv("PINECONE_API_KEY")


In [18]:
from pinecone import ServerlessSpec
from pinecone import Pinecone
pc = Pinecone(api_key=pinecone_api_key)

In [19]:
index_name = "n8n"
pc.has_index(index_name)


True

In [20]:
#creating the index
if not pc.has_index(index_name):
    pc.create_index(
        index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

In [21]:
#Loading the index
index= pc.Index(index_name)

In [22]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index_name=index_name, embedding=embeddings)

In [23]:
# here you should add the documents chunks please id uuid for each chunk
# documents = [
#     {"id": "1", "text": "The capital of France is Paris."},
#     {"id": "2", "text": "Paris is a city in France."},
#     {"id": "3", "text": "The Eiffel Tower is in Paris."},
# ]
# vector_store.add_documents(documents)
# I am not doing it cause I already have the documents in the pinecone index which is research paper attention mechanism


In [24]:
result = vector_store.similarity_search("What is the name of the research paper?", k=1, threshold=0.5)
result

[]

In [25]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3}, threshold=0.5)
retriever.invoke("What is the name of the research paper?")

[]

In [26]:
from langchain_openai import ChatOpenAI

gptLlm = ChatOpenAI(model_name="o3-mini")

In [27]:
#build RAG pipeline like previous notebook
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant."),
    ("human", "{question}")
])

output_parser = StrOutputParser()

rag_chain = (
    {"question": RunnablePassthrough()}
    | prompt
    | gptLlm
    | output_parser
)

rag_chain.invoke("What is the name of the research paper?")

'Could you please clarify which research paper youâ€™re referring to or provide additional context? That way, I can help you find the correct name.'