In [3]:
import os
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
from langchain.vectorstores import Pinecone as PineconeVectorStore

In [2]:
load_dotenv()

# Step 1: Load and preprocess PDF data
def load_pdf(data_path):
    loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

extracted_data = load_pdf("data/")

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks = text_split(extracted_data)
print("Number of chunks:", len(text_chunks))

Number of chunks: 5860


In [5]:
# Step 2: Initialize embeddings
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

embeddings = download_hugging_face_embeddings()

In [6]:
# Step 3: Configure Pinecone
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")
INDEX_NAME = os.getenv("INDEX_NAME")
DIMENSION = int(os.getenv("DIMENSION"))

# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Check if the index exists and create it only if it doesn't
existing_indexes = [index.name for index in pc.list_indexes()]
if INDEX_NAME not in existing_indexes:
    print(f"Index '{INDEX_NAME}' does not exist. Creating index...")
    pc.create_index(
        name=INDEX_NAME,
        dimension=DIMENSION,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region=PINECONE_ENV
        )
    )
    print(f"Index '{INDEX_NAME}' created successfully.")
else:
    print(f"Index '{INDEX_NAME}' already exists.")

# Connect to the existing or newly created index
index = pc.Index(INDEX_NAME)
print(f"Connected to index '{INDEX_NAME}'.")

Index 'chatbot' already exists.
Connected to index 'chatbot'.


In [7]:
# Step 4: Upsert data into Pinecone
def upsert_embeddings_to_pinecone(index, text_chunks, embeddings, batch_size=100):
  # Extract text content from each chunk
  chunk_texts = [t.page_content for t in text_chunks]  # t.page_content is the raw document text
  chunk_embeddings = embeddings.embed_documents(chunk_texts)  # Generate embeddings for the text chunks

  for i in range(0, len(chunk_embeddings), batch_size):
    batch = chunk_embeddings[i:i+batch_size]  # Create a batch of embeddings
    # Store the text content in the metadata under the 'text' key
    metadata = [{"text": chunk_texts[i+j]} for j in range(len(batch))]
    # Upsert the vectors along with their metadata (including text)
    vectors = [(f"id-{i+j}", batch[j], metadata[j]) for j in range(len(batch))]
    index.upsert(vectors)  # Upsert the vectors into Pinecone

  print("Upsert complete.")

upsert_embeddings_to_pinecone(index, text_chunks, embeddings)

Upsert complete.


In [8]:
# Step 5: Define Pinecone as the Retriever with top k=1
retriever = PineconeVectorStore.from_existing_index(
    index_name=INDEX_NAME,
    embedding=embeddings 
).as_retriever(search_kwargs={"k": 1}) 

In [9]:
# Step 6: Define Prompt Template
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [10]:
# Step 7: Initialize LLM
llm = CTransformers(
    model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    model_type="llama",
    config={'max_new_tokens': 512, 'temperature': 0.8}
)

In [11]:
# Step 8: Use the Retriever in the RetrievalQA Chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever, 
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [12]:
# Step 9: Query Loop with Top k = 1 response
while True:
    user_input = input("Enter your question (or type 'exit' to quit): ")
    if user_input.lower() == "exit": 
        print("Exiting the program. Goodbye!")
        break 
    
    result = qa.invoke({"query": user_input})  
    print("Response:", result["result"]) 

Response: Acne is a common skin condition that occurs when the pores on the skin become clogged with dead skin cells, oil, and bacteria. It can cause whiteheads, blackheads, and inflamed red pimples on the face, as well as other areas of the body. Acne is most commonly seen in people during puberty, but it can also occur at any age. There are several treatment options available, including topical creams and gels, oral antibiotics, and blue light therapy.
Response: Allergies occur when your immune system mistakenly identifies a harmless substance, such as pollen, dust, or food, as a threat. When this substance enters your body, your immune system produces antibodies to fight it off, leading to an inflammatory response and the symptoms associated with allergies.
Exiting the program. Goodbye!
