## Load and chunk PDFs

In [18]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
import os
from pathlib import Path
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.callbacks.base import BaseCallbackHandler
import gradio as gr

# Constants
MODEL = "llama3.2"
DB_NAME = "vector_db"
OLLAMA_API = "http://localhost:11434/api/chat"

# script_dir = Path(__file__).parent
script_dir = os.getcwd()
print("Script Directory:", script_dir)

# Function to load PDFs
def load_pdfs(pdf_folder):
    documents = []
    for file in os.listdir(pdf_folder):
        if file.endswith(".pdf"):
            loader = PyMuPDFLoader(os.path.join(pdf_folder, file))
            documents.extend(loader.load())
    return documents

# Function to chunk documents
def chunk_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return text_splitter.split_documents(documents)

# Load and process documents
# pdf_folder = Path(__file__).parent
pdf_folder = os.getcwd()
documents = load_pdfs(pdf_folder)
if not documents:
    print("No documents found")
    exit()
chunks = chunk_documents(documents)

# Convert text into embeddings and store in vector DB
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_db")

# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Create llm
llm = ChatOllama(model=MODEL)

# Initialize memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Create conversational chain
qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)



Script Directory: /Users/ritesh/codebase/ritesh-codebase/workspaceGPT/apps/confluence-rag


In [30]:
query="hi"
retrieved_docs = retriever.invoke(query)

for idx, doc in enumerate(retrieved_docs):
    title = doc.metadata.get("title", "No Title")
    content = doc.page_content  # Extracts the document's text content
    
    # print(f"Document {idx + 1}:")
    print(f"Title: {title}")
    print(f"Content:\n{content}\n")
    print("=" * 80)  # Separator for readability

[Document(metadata={'author': '', 'creationDate': "D:20250217171142+00'00'", 'creationdate': '2025-02-17T17:11:42+00:00', 'creator': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/133.0.0.0 Safari/537.36', 'file_path': '/Users/ritesh/codebase/ritesh-codebase/workspaceGPT/apps/confluence-rag/B Test for Product Price Experimentation - How to Setup and Test-170225-171133.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': "D:20250217171142+00'00'", 'moddate': '2025-02-17T17:11:42+00:00', 'page': 1, 'producer': 'Skia/PDF m133', 'source': '/Users/ritesh/codebase/ritesh-codebase/workspaceGPT/apps/confluence-rag/B Test for Product Price Experimentation - How to Setup and Test-170225-171133.pdf', 'subject': '', 'title': 'A/B Test for Product Price Experimentation - How to Setup and Test - D2C - Confluence', 'total_pages': 2, 'trapped': ''}, page_content='Test\nTesting Organically\n1. Visit the mms storefront you wish to test on\n2. You will be given a 

In [44]:
def chat(question, history):
    if question.lower() in ["hi", "hello"]:
        return "Hi, I am Workspace Assistant. How can I help you?"

    result = qa_chain.invoke({"question": question})
    return result["answer"]

# And in Gradio:

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [None]:

# Custom Streamlit Callback
class StreamHandler(BaseCallbackHandler):
    def __init__(self, container):
        self.container = container
        self.text = ""

    def on_llm_new_token(self, token: str, **kwargs):
        self.text += token
        self.container.markdown(self.text)  # Update Streamlit container dynamically

# Streamlit UI

st.title("Chat with FAISS-powered RAG")
query = st.text_input("Ask a question:")

if query:
    with st.container():
        # Initialize Streamlit callback
        stream_handler = StreamHandler(st.empty())

        # Initialize LLM with streaming handler
        llm = ChatOllama(model=MODEL, callbacks=[stream_handler])

        # Initialize memory
        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

        # Create conversational chain
        qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

        # Invoke chain
        qa_chain.invoke({"question": query})

## Convert text into embeddings and store in vectorDB

In [8]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cuda"})

vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_db")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


## Query the vecor database

In [55]:
retriever = vectorstore.as_retriever(search_kwargs={"k":5})
query="what is price experimentation"
retrieved_docs = retriever.invoke(query)

# for doc in retrieved_docs:
#     print(doc.page_content)

Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


In [56]:
# Find how many dimensions vector has

collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])
len(sample_embedding["embeddings"][0])

384

## Generate response with ollama with memory

In [60]:
from langchain_ollama import ChatOllama
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.callbacks import StreamingStdOutCallbackHandler

llm = ChatOllama(model=MODEL, callbacks = [StreamingStdOutCallbackHandler()])

#initialize memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

#create QA chain
qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever,memory=memory)
result = qa_chain.invoke({"question": "what is price experimentaion"})


Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


Price experimentation, also known as A/B testing for product pricing, is a method used to determine whether changing the price of a product or service results in an increase or decrease in sales, revenue, or other desired metrics.

The goal of price experimentation is to identify the optimal price point for a product or service that maximizes revenue while still being competitive and appealing to customers. This can be particularly challenging when trying to balance profitability with customer demand and market conditions.

Price experimentation involves comparing two or more different pricing scenarios:

1. **Control group**: The current, existing price of the product or service.
2. **Treatment group**: A new, experimental price that is being tested.
3. **Target audience**: The specific customers who will be exposed to each pricing scenario.

By analyzing the performance of both groups over a statistically significant period, you can determine which price performs better and make data

## Gradio interface

In [62]:
def chat_generator(query):
    """Generator function to stream chat output."""
    for chunk in qa_chain.stream({"question": query}):
        if "answer" in chunk:
            yield chunk["answer"]

In [None]:
! streamlit run ./chat_interface.py

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



      👋 [1mWelcome to Streamlit![0m

      If you’d like to receive helpful onboarding emails, news, offers, promotions,
      and the occasional swag, please enter your email address below. Otherwise,
      leave this field blank.

      [34mEmail: [0m 

In [None]:
hi