In [18]:
from langchain_pinecone import PineconeVectorStore
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
from langchain_openai import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI

In [19]:
from dotenv import load_dotenv

load_dotenv()

True

In [20]:
pc = Pinecone()

In [21]:
index_name = "demo"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=1536,   # dimensions of text-embedding-3-small
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

In [22]:
index = pc.Index("demo")

In [23]:
urls = [
    "https://docs.python.org/3/tutorial/index.html",
    "https://realpython.com/python-basics/",
    "https://www.learnpython.org/"
]

loader = UnstructuredURLLoader(urls=urls)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs)

In [24]:
len(doc_splits)

272

In [25]:
embeddings = OpenAIEmbeddings()

vectorstore = PineconeVectorStore(embedding=embeddings, index=index )

vectorstore.add_documents(documents=doc_splits)

['f4966d23-c4a2-4cce-a460-cdb610c5939d',
 '5dd91fa8-7e2d-4608-a827-a35363cddc3c',
 'e2318085-88ad-4d5d-9eb3-11420174ef20',
 '785070c6-8c54-4840-9950-139a0dbff485',
 'db992a74-f3e9-4069-8219-305d22504a12',
 'd12dab70-cc85-470b-88f1-5929e58f4b77',
 '2e4c474a-61b2-4ac6-9db6-19c0d33cbb12',
 '7ebb630f-8bf2-4fd5-974a-325ca7e133d0',
 'e91a750d-ec2d-4398-b539-16374ed891fb',
 'd677ad0c-bd24-4f5a-8d96-f4696b63108a',
 '7f4b10a1-bd45-449b-b8a1-7f89ce2c7407',
 'a801894d-2c81-4aad-91bb-6f6a2af3efca',
 '97beba5b-0dcb-40a0-a718-f51bd781b702',
 'a6b58f04-6ffd-4e76-a96c-ebc34e467355',
 '27ad52d9-b298-4ce6-8686-0432fd329a51',
 '9ac0f001-f2e4-4429-b1f0-39b9ddcf8651',
 '2a6ac332-2f6d-4d72-9106-27f87c8a7e3e',
 '6a3472a5-d671-4891-82fe-62cd6318bce4',
 'd54554fc-600d-4c2c-8875-5819ccd88760',
 '4eb4ccf7-b631-4fc2-9f63-c1a3101aeacb',
 '459914bf-2488-4e9b-bde1-d1e26c43c3e8',
 'd6beb585-37e3-4d05-b733-7774cb700caa',
 '55945a30-3298-484b-aaf5-3783be7a8b32',
 '775bf49e-f313-4851-b58b-56c58c15ad22',
 '880037c1-e7fd-

In [26]:
retriever = vectorstore.as_retriever(search_type="similarity")

In [29]:
# Define Retrieval and Answer Generation Functions
def retrieve(question):
    documents = retriever.invoke(question)
    return [doc.page_content for doc in documents]

# Answer Generation
def generate_answer(question, context):
    llm = ChatOpenAI(model="gpt-4o-mini")
    context_text = "\n".join(context)
    prompt = f"Context:\n{context_text}\n\nQuestion: {question}\n\n"
    response = llm.invoke(prompt)
    return response

# Execute the Workflow
def run_pipeline(question):
    print("Retrieving documents...")
    documents = retrieve(question)
    
    print("Generating answer...")
    answer = generate_answer(question, documents)
    return answer

# Chatbot Interface
   
question = "You: What are generators in python?"
try:
    answer = run_pipeline(question)
    print(f"AI: {answer.content}\n")
except Exception as e:
    print(f"Error: {e}\n")

Retrieving documents...
Documents Start:
[Document(id='7aed76a6-efc8-4cb0-a1ca-555b1b3292b5', metadata={'source': 'https://www.learnpython.org/'}, page_content='Generators\n\nList Comprehensions\n\nLambda functions\n\nMultiple Function Arguments'), Document(id='0b29bf65-d3a4-44bc-9b48-ce0cb3f47cd9', metadata={'source': 'https://realpython.com/python-basics/'}, page_content='Are You Learning Python, But You’re Not Sure Where to Start and What the “Roadmap” Looks Like?'), Document(id='efedc9f8-4e1a-4e9a-b883-b877a077f52e', metadata={'source': 'https://realpython.com/python-basics/'}, page_content='Make Your Code More Pythonic »'), Document(id='2c0021d9-44c9-4f3c-a991-11a3ad8331ce', metadata={'source': 'https://docs.python.org/3/tutorial/index.html'}, page_content='9.9. Generators\n\n9.10. Generator Expressions\n\n10. Brief Tour of the Standard Library')]

Documents End


Generating answer...
Context Start:
Generators

List Comprehensions

Lambda functions

Multiple Function Arguments
Are

In [None]:
# Query Pinecone
# Use the same dimension as your index
dummy_vector = [0] * 1536  # 1536 if you used text-embedding-3-small

results = index.query(
    vector=dummy_vector,
    top_k=10,  # max number of results you want
    filter={"source": "https://www.learnpython.org/"},
    include_metadata=True,
    include_values=False
)

for match in results["matches"]:
    print(match["id"], match["metadata"])

In [14]:
# delete the document from pinecone where the "source" attribute in the "metadata" is https://www.learnpython.org/
index.delete(
    filter={
        "source": "https://www.learnpython.org/"
    }
)

{}

In [None]:
# delete everything(all the documents) in the index but not the index itself
index.delete(delete_all=True)

{}

In [None]:
# list all the indexes
print(pc.list_indexes())

In [17]:
# delete the index
pc.delete_index("demo")