In [9]:
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from langchain_community.document_loaders import TextLoader
from langchain_huggingface import HuggingFaceEmbeddings
# from langchain_pinecone import PineconeVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
import numpy as np
import os

load_dotenv()
api_key = os.getenv("PINECONE_API_KEY")

if not api_key:
    raise ValueError("PINECONE_API_KEY is not found in the .env file")

try:
    pc = Pinecone(api_key=api_key)
except Exception as e:
    raise Exception(f"Failed to initialize the pinecone client{e}")

In [10]:
# Normalize the embeddings
class NormalizedEmbeddings(HuggingFaceEmbeddings):
    def embed_query(self, text):
        vec = super().embed_query(text)
        return (vec / np.linalg.norm(vec)).tolist()
    
    def embed_documents(self, texts):
        vecs = super().embed_documents(texts)
        return [(v / np.linalg.norm(v)).tolist() for v in vecs]

In [11]:
index_name = "first-pc-rag"

if index_name in pc.list_indexes().names():
    print("Index already exists")
else:
    print("Index does not exists")

# connect to the index
index = pc.Index(index_name)

# delete vectors in index (this is needed when I want to change the number of chunks)
# index.delete(delete_all=True)

# load and split document
loader = TextLoader("policies.md", encoding="utf-8")
documents = loader.load()
print(f"Loaded {len(documents)} document(s)")
print(f"Total characters in document is: {len(documents[0].page_content)}")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
print(f"split into {len(chunks)} chunks")
for i, chunk in enumerate(chunks):
    print(f"Chunk {i+1} length: {len(chunk.page_content)} characters")

embeddings_model = NormalizedEmbeddings(model_name="BAAI/bge-small-en-v1.5")

vectors = []
for i, chunk in enumerate(chunks):
    vector = embeddings_model.embed_query(chunk.page_content)
    vectors.append({
        "id": f"chunk-{i}",
        "values": vector,
        "metadata": {
            "text": chunk.page_content,
            "source": chunk.metadata.get("source", "policies.md")
        }
    })







Index already exists
Loaded 1 document(s)
Total characters in document is: 3765
split into 9 chunks
Chunk 1 length: 443 characters
Chunk 2 length: 422 characters
Chunk 3 length: 451 characters
Chunk 4 length: 487 characters
Chunk 5 length: 493 characters
Chunk 6 length: 494 characters
Chunk 7 length: 440 characters
Chunk 8 length: 472 characters
Chunk 9 length: 72 characters


In [12]:
# upserting into pinecone
batch_size=100
for i in range(0, len(vectors), batch_size):
    batch = vectors[i:1 + batch_size]
    index.upsert(vectors=batch)
    print(f"Upserted batch {i//batch_size + 1}/{(len(vectors)-1)//batch_size + 1}")
print(f"successfully upserted {len(vectors)} vectors")

Upserted batch 1/1
successfully upserted 9 vectors


In [18]:
query = "what does the company say about mobile phones"
query_vector = embeddings_model.embed_query(query)

results = index.query(
    vector=query_vector,
    top_k=3,
    include_metadata=True
)
# if results['matches']:
#     for match in results['matches']:
#         print(f"score: {match['score']:.4f}")
#         print(f"Text: {match['metadata']['text'][:200]}...\n")
# else: 
#     print("No matches found")

In [34]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import  PromptTemplate     
from langchain_huggingface import HuggingFacePipeline
from langchain_pinecone import Pinecone                   # WORKS
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

vectorstore = Pinecone(
    index=index,
    embedding=embeddings_model,
    text_key="text"
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

model_id = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16,
    low_cpu_mem_usage=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.3,
    top_p=0.9,
    do_sample=True,
    repetition_penalty=1.1,
    pad_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline=pipe)

Device set to use cpu


In [None]:
# prompt template
# === 3. Prompt Template ===
template = """You are a helpful assistant for DelishDine Restaurants Ltd.
Answer the question using ONLY the context below. If unsure, say "I don't know."

Context:
{context}

Question: {question}
Answer:"""

prompt = PromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

query = "what is the company name"
answer = rag_chain.invoke(query)
print("Answer:", answer)


Answer: You are a helpful assistant for DelishDine Restaurants Ltd.
Answer the question using ONLY the context below. If unsure, say "I don't know."

Context:
# DelishDine Restaurants Ltd. â€“ Company Policy Manual

## Company Name
The name of our company is **DelishDine Restaurants Ltd.**

## 1. Introduction
Welcome to DelishDine Restaurants Ltd.

This policy manual outlines the standards, rules, and expectations that guide how we work together to deliver exceptional dining experiences to our customers. All employees are expected to read, understand, and comply with the policies described herein.

**Sharing company secrets with outside parties is strictly prohibited.**

### Reporting Hazards
Report any equipment malfunctions or food safety hazards immediately to the supervisor.

## 7. Customer Service Policy

### Greeting
Greet every customer with a smile and a warm welcome.

### Handling Concerns
Address all customer concerns promptly and courteously.

### Escalation
**Never argue wi