In [None]:
import os
os.environ["PINECONE_API_KEY"] = "your_pinecone_api_key"

In [5]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load all files again
loader = DirectoryLoader(
    "../data", 
    glob="**/*.txt", 
    loader_cls=TextLoader
)
documents = loader.load()

# Assign metadata like file name → category
for doc in documents:
    filename = doc.metadata["source"].split("/")[-1]
    doc.metadata["category"] = filename.replace(".txt", "")

# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
docs = splitter.split_documents(documents)  # 👈 This is the "docs" we use

## Step 1: Set Up Pinecone API Key + Index

In [6]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone client
pc = Pinecone()  # ← picks up from env
# Create an index if it doesn’t exist already
index_name = "rag-index"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,  # for all-MiniLM-L6-v2
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

## Step 2: Store Documents in Pinecone via LangChain

In [7]:
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Store into Pinecone
vectorstore = PineconeVectorStore.from_documents(
    documents=docs,  # your previously chunked documents
    embedding=embedding_model,
    index_name=index_name
)

## Step 3 — Load HuggingFace LLM Locally (No API key needed)

In [9]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_huggingface import HuggingFacePipeline

# Choose your model (you can upgrade to flan-t5-large later)
model_name = "google/flan-t5-base"

# Load tokenizer and model from HuggingFace Hub (no token needed)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create a text2text generation pipeline
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    temperature=0.7,
    top_p=0.9,
    do_sample=True  # helps avoid repetition
)

Device set to use mps:0


## Step 4: Create Retriever + QA Chain

In [10]:
from langchain.chains import RetrievalQA

# Wrap pipeline in LangChain-compatible HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)

# Build retrieval QA chain
retriever = vectorstore.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="stuff"
)

## Step 5: Ask a Query

In [11]:
query = "How long does a refund take?"
response = qa_chain.invoke({"query": query})

print("📘 Answer:", response['result'])
print("\n📂 Sources:")
for doc in response['source_documents']:
    print(f"• {doc.metadata.get('category')}")

📘 Answer: 3 business days

📂 Sources:
• billing
• billing
• billing
• billing
