In [1]:
from pinecone import Pinecone
from dotenv import load_dotenv
import os

load_dotenv()

True

In [2]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index(host=os.getenv("PINECONE_HOST"))

In [3]:
# === Chunking function ===
def chunk_text(text, chunk_size=1500, overlap=200):
    chunks = []
    start = 0
    while start < len(text):
        end = min(len(text), start + chunk_size)
        chunks.append(text[start:end])
        start += chunk_size - overlap
    return chunks

In [4]:
import os

# === Walk through directories ===
root_dir = "/home/saintramon/thanatos/knowledge"

records = []

for subdir, dirs, files in os.walk(root_dir):
    category = os.path.basename(subdir)
    for file in files:
        if file.endswith(".txt"):
            file_path = os.path.join(subdir, file)
            with open(file_path, "r", encoding="utf-8") as f:
                text = f.read()

            chunks = chunk_text(text)
            base_id = f"{category}_{os.path.splitext(file)[0]}"

            for i, chunk in enumerate(chunks, start=1):
                record = {
                    "_id": f"{base_id}_chunk{i}",
                    "text": chunk,
                    "source": file,
                    "category": category,
                    "chunk_number": i,
                    "path": file_path
                }
                records.append(record)

In [5]:
print(f"Uploading {len(records)} chunks...")


index.upsert_records(os.getenv("PINECONE_NAMESPACE"), records)


print("✅ Upload complete!")

Uploading 13 chunks...
✅ Upload complete!
