In [87]:
# CELL 1: SETUP AND IMPORTS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_pinecone import Pinecone as LangchainPinecone
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Pinecone
from dotenv import load_dotenv
import os

load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")

print("✅ Setup complete. Using index:", PINECONE_INDEX_NAME)

✅ Setup complete. Using index: sterling-pine


In [88]:
# CELL 2: DELETE ALL EXISTING VECTORS
print("--- Deleting all existing vectors ---")
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(PINECONE_INDEX_NAME)

index.delete(delete_all=True)
print("Deletion complete.")

# Verify the index is empty
stats = index.describe_index_stats()
print("✅ Verification -> Total vectors in index:", stats["total_vector_count"])

--- Deleting all existing vectors ---
Deletion complete.
✅ Verification -> Total vectors in index: 0


In [89]:
# CELL 3: LOAD AND CHUNK DOCUMENTS
print("\n--- Reading and chunking new data ---")
loader = PyPDFDirectoryLoader("documents/")
docs = loader.load()
print(f"Loaded {len(docs)} pages from the document.")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=50)
chunked_docs = text_splitter.split_documents(docs)
print(f"✅ Split document into {len(chunked_docs)} chunks.")


--- Reading and chunking new data ---
Loaded 23 pages from the document.
✅ Split document into 58 chunks.


In [90]:
# CELL 4: CREATE EMBEDDINGS AND INSERT DATA
print("\n--- Creating embeddings and inserting data ---")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

LangchainPinecone.from_documents(
    documents=chunked_docs,
    embedding=embeddings,
    index_name=PINECONE_INDEX_NAME
)
print(f"✅ Successfully inserted {len(chunked_docs)} new chunks into Pinecone.")


--- Creating embeddings and inserting data ---
✅ Successfully inserted 58 new chunks into Pinecone.


In [1]:
# A standalone cell to check the status of your Pinecone index.

from pinecone import Pinecone
from dotenv import load_dotenv
import os

print("--- Checking Pinecone Index Status ---")

# --- 1. Setup and Connect ---
# Loads credentials and connects to your specific index.
load_dotenv()
try:
    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
    index_name = os.getenv("PINECONE_INDEX_NAME")
    index = pc.Index(index_name)
    print(f"✅ Successfully connected to index: '{index_name}'")
except Exception as e:
    print(f"❌ Error connecting to Pinecone: {e}")
    index = None

# --- 2. Get and Report Stats ---
# Fetches the latest stats and prints them.
if index:
    stats = index.describe_index_stats()
    vector_count = stats.get('total_vector_count', 0)
    
    print("\n📊 Index Report:")
    print(f"   - Vector Count: {vector_count}")
    print(f"   - Vector Dimension: {stats.get('dimension', 'N/A')}")

    # --- 3. Verification Check ---
    # Checks if the vector count matches the expected number.
    if vector_count == 58:
        print("\n🎉 Verification successful! The vector count is correct.")
    else:
        print(f"\n⚠️ Verification failed. Expected 58 vectors, but found {vector_count}.")

--- Checking Pinecone Index Status ---
✅ Successfully connected to index: 'sterling-pine'

📊 Index Report:
   - Vector Count: 58
   - Vector Dimension: 384

🎉 Verification successful! The vector count is correct.
