Step-1 : Extract the PDF text

Step-2 : Chunk the extracted PDF text

Step-3 : Create a vector store with the PDF chunks

Step-4 : Create a retriever which returns the relevant chunks

Step-5 : Build context from the relevant chunk texts

Step-6 : Build the RAG chain using rag prompt, LLM and string output parser.

Step-7 : Run the RAG chain to get the answer.

### Install and Import all the libraries

In [None]:
# !pip install -qU langchain langchain-community langchain-text-splitters
# !pip install -qU langchain-openai langchain-chroma pypdf

In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda

  from .autonotebook import tqdm as notebook_tqdm


## setup llm api key

In [3]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]
groq_api_key = os.environ["GROQ_API_KEY"]

## Extract pdf Text

In [None]:
# Download the PDF file
import requests

pdf_url = 'https://arxiv.org/pdf/1706.03762'

response = requests.get(pdf_url)

pdf_path = 'attention_is_all_you_need.pdf'
with open(pdf_path, 'wb') as file:
    file.write(response.content)

In [6]:
from typing import List
from langchain.schema import Document

def pdf_extract(pdf_path: str) -> List[Document]:
    
    """
    Extracts text from a PDF file using PyPDFLoader.

    Parameters:
    pdf_path (str): The file path of the PDF to be extracted.

    Returns:
    List[Document]: A list of Document objects containing the extracted text from the PDF.
    """
    
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    return documents
    
  

In [7]:
pdf_text=pdf_extract(pdf_path)


In [8]:
print(r"number of pages in pdf:", len(pdf_text))

number of pages in pdf: 15


## Chunk Pdf Text

In [9]:
def pdf_chunk(documents: List[Document], chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
    
    """
    Splits the text of documents into smaller chunks using RecursiveCharacterTextSplitter.

    Parameters:
    documents (List[Document]): A list of Document objects to be split into chunks.
    chunk_size (int): The maximum size of each chunk. Default is 1000 characters.
    chunk_overlap (int): The number of overlapping characters between chunks. Default is 200 characters.

    Returns:
    List[Document]: A list of Document objects containing the text chunks.
    """
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    chunks = text_splitter.split_documents(documents)
    return chunks

In [10]:
chunks=pdf_chunk(pdf_text)
print(r"number of chunks created:", len(chunks))

number of chunks created: 52


In [11]:
print(chunks[0])

page_content='Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani‚àó
Google Brain
avaswani@google.com
Noam Shazeer‚àó
Google Brain
noam@google.com
Niki Parmar‚àó
Google Research
nikip@google.com
Jakob Uszkoreit‚àó
Google Research
usz@google.com
Llion Jones‚àó
Google Research
llion@google.com
Aidan N. Gomez‚àó ‚Ä†
University of Toronto
aidan@cs.toronto.edu
≈Åukasz Kaiser‚àó
Google Brain
lukaszkaiser@google.com
Illia Polosukhin‚àó ‚Ä°
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurr

## Create Vector store

In [None]:
# Set the chroma DB path
import os

# Define the directory where you want to store the vector database
persist_directory = "vector_db/chroma_db_pdf"

# Create the directory (including parent folders if needed)
os.makedirs(persist_directory, exist_ok=True)




Vector DB directory created at: c:\Users\kumar\RAG\vector_db\chroma_db_pdf


In [19]:
def create_vector_store(chunks: List[Document], persist_directory: str) -> Chroma:
    
    """
    Creates a Chroma vector store from the provided document chunks.

    Parameters:
    chunks (List[Document]): A list of Document objects to be stored in the vector store.
    persist_directory (str): The directory path where the vector store will be persisted.

    Returns:
    Chroma: An instance of the Chroma vector store containing the document embeddings.
    """
    
    embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en")
    
    vectordb = Chroma.from_documents(
        documents=chunks,
        embedding=embedding_model,
        persist_directory=persist_directory
    )
    
    return vectordb

In [20]:
db = create_vector_store(chunks, persist_directory)

## Retrive the Relevant Chunks

In [None]:
# def retrieve_context(db: Chroma, query: str) -> List[Document]:
#     """
#     Retrieves relevant document chunks from the Chroma vector store based on a query.

#     Parameters:
#     db (Chroma): The Chroma vector store containing embedded documents.
#     query (str): The query string to search for relevant document chunks.

#     Returns:
#     List[Document]: A list of retrieved relevant document chunks.
#     """

#     retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
#     print("Relevant chunks are retrieved...\n")
#     relevant_chunks = retriever.invoke(query)

#     return relevant_chunks

In [54]:
def retrieve_context(db, query):
    """
    Retrieves relevant document chunks from the Chroma vector store based on a query.

    Parameters:
    db (Chroma): The Chroma vector store containing embedded documents.
    query (str): The query string to search for relevant document chunks.

    Returns:
    List[Document]: A list of retrieved relevant document chunks.
    """

    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
    print("Relevant chunks are retrieved...\n")
    relevant_chunks = retriever.invoke(query)

    return relevant_chunks

In [55]:
query = "Explain transformer model in one line"

relevant_chunks = retrieve_context(db, query)

Relevant chunks are retrieved...



In [56]:
print(f"Number of relevant chunks = {len(relevant_chunks)}")

Number of relevant chunks = 2


In [24]:
for i, chunk in enumerate(relevant_chunks):
  print(f"Chunk-{i}")
  print(chunk)
  print("\n")

Chunk-0
page_content='sequential nature precludes parallelization within training examples, which becomes critical at longer
sequence lengths, as memory constraints limit batching across examples. Recent work has achieved
significant improvements in computational efficiency through factorization tricks [21] and conditional
computation [32], while also improving model performance in case of the latter. The fundamental
constraint of sequential computation, however, remains.
Attention mechanisms have become an integral part of compelling sequence modeling and transduc-
tion models in various tasks, allowing modeling of dependencies without regard to their distance in
the input or output sequences [2, 19]. In all but a few cases [27], however, such attention mechanisms
are used in conjunction with a recurrent network.
In this work we propose the Transformer, a model architecture eschewing recurrence and instead
relying entirely on an attention mechanism to draw global dependencies between 

## Build context

In [37]:
def build_context(relevant_chunks: List[Document]) -> str:
    """
    Builds a context string from retrieved relevant document chunks.

    Parameters:
    relevant_chunks (List[Document]): A list of retrieved relevant document chunks.

    Returns:
    str: A concatenated string containing the content of the relevant chunks.
    """
    print("Context is build from the relevant chunks")  
    
    context = "\n".join([chunk.page_content for chunk in relevant_chunks])
    return context

In [38]:
context=build_context(relevant_chunks)
print(context)

Context is build from the relevant chunks
sequential nature precludes parallelization within training examples, which becomes critical at longer
sequence lengths, as memory constraints limit batching across examples. Recent work has achieved
significant improvements in computational efficiency through factorization tricks [21] and conditional
computation [32], while also improving model performance in case of the latter. The fundamental
constraint of sequential computation, however, remains.
Attention mechanisms have become an integral part of compelling sequence modeling and transduc-
tion models in various tasks, allowing modeling of dependencies without regard to their distance in
the input or output sequences [2, 19]. In all but a few cases [27], however, such attention mechanisms
are used in conjunction with a recurrent network.
In this work we propose the Transformer, a model architecture eschewing recurrence and instead
relying entirely on an attention mechanism to draw global d

## combine all steps into one Function

In [41]:
import os
from typing import Dict
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

def get_context(inputs: Dict[str, str]) -> Dict[str, str]:
    """
    Creates or loads a vector store for a given PDF file and extracts relevant chunks based on a query.

    Args:
        inputs (Dict[str, str]): A dictionary containing the following keys:
            - 'pdf_path' (str): Path to the PDF file.
            - 'query' (str): The user query.
            - 'db_path' (str): Path to the vector database.

    Returns:
        Dict[str, str]: A dictionary containing:
            - 'context' (str): Extracted relevant context.
            - 'query' (str): The user query.
    """
    pdf_path, query, db_path  = inputs['pdf_path'], inputs['query'], inputs['db_path']

    # Create new vector store if it does not exist
    if not os.path.exists(db_path):
        print("Creating a new vector store...\n")
        pdf_text = pdf_extract(pdf_path)
        chunks = pdf_chunk(pdf_text)
        db = create_vector_store(chunks, db_path)

    # Load the existing vector store
    else:
        print("Loading the existing vector store\n")
        #embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
        embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en")
        db = Chroma(persist_directory=db_path, embedding_function=embedding_model)

    relevant_chunks = retrieve_context(db, query)
    context = build_context(relevant_chunks)

    return {'context': context, 'query': query}

In [42]:
context= get_context({'pdf_path': pdf_path, 'query': query, 'db_path': persist_directory})
print(context)

Loading the existing vector store

Relevant chunks are retrieved...

Context is build from the relevant chunks
{'context': 'sequential nature precludes parallelization within training examples, which becomes critical at longer\nsequence lengths, as memory constraints limit batching across examples. Recent work has achieved\nsignificant improvements in computational efficiency through factorization tricks [21] and conditional\ncomputation [32], while also improving model performance in case of the latter. The fundamental\nconstraint of sequential computation, however, remains.\nAttention mechanisms have become an integral part of compelling sequence modeling and transduc-\ntion models in various tasks, allowing modeling of dependencies without regard to their distance in\nthe input or output sequences [2, 19]. In all but a few cases [27], however, such attention mechanisms\nare used in conjunction with a recurrent network.\nIn this work we propose the Transformer, a model architecture e

## Build RAG Chain

In [None]:
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq

template = """ You are an AI model trained for question answering. You should answer the
  given question based on the given context only.
  Question : {query}
  \n
  Context : {context}
  \n
  If the answer is not present in the given context, respond as: The answer to this question is not available
  in the provided content.
  """

rag_prompt = ChatPromptTemplate.from_template(template)

#llm = ChatGroq(model="llama3-70b-8192")
llm = ChatOpenAI(model='gpt-4o-mini')

str_parser = StrOutputParser()

rag_chain = (
    RunnableLambda(get_context)
    | rag_prompt
    | llm
    | str_parser
)
    

## Query

In [45]:
# Write the query
query = 'Explain transformer model in one line'

In [52]:
answer = rag_chain.invoke({'pdf_path':pdf_path, 'query':query, 'db_path':persist_directory})

Loading the existing vector store

Relevant chunks are retrieved...

Context is build from the relevant chunks


In [53]:

print(f"Query:{query}\n")
print(f"Generated answer:{answer}")

Query:Explain transformer model in one line

Generated answer:The Transformer is a model architecture that relies entirely on an attention mechanism to draw global dependencies between input and output, without using recurrence.


In [51]:
while True:
    question = input("Enter your question (or type 'exit' to quit): ")

    if question.lower() == "exit":
        print("Exiting the program.")
        break

    response = rag_chain.invoke({
        "query": question,
        "pdf_path": pdf_path,
        "db_path": persist_directory
    })

    print("\nResponse:", response, "\n")

Loading the existing vector store

Relevant chunks are retrieved...

Context is build from the relevant chunks

Response: Self-attention, sometimes called intra-attention, is an attention mechanism relating different positions of a single sequence in order to compute a representation of the sequence. 

Loading the existing vector store

Relevant chunks are retrieved...

Context is build from the relevant chunks

Response: The answer to this question is not available in the provided content. 

Exiting the program.


## streamlit 

In [93]:
%%writefile rag_app1.py

import streamlit as st
import requests
import os
from typing import List
from dotenv import load_dotenv, find_dotenv
import json
from datetime import datetime
import sys

# LangChain imports
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda
from langchain.schema import Document
from langchain_openai import ChatOpenAI

# Load environment variables
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ.get("OPENAI_API_KEY")

# Add error handling for missing API key
if not openai_api_key:
    st.error("‚ùå OPENAI_API_KEY not found in environment variables. Please check your .env file.")

class RAGSystem:
    def __init__(self):
        self.pdf_path = 'attention_is_all_you_need.pdf'
        self.persist_directory = "vector_db/chroma_db_pdf"
        self.db = None
        self.rag_chain = None
        self.initialized = False
        
    def download_pdf(self):
        """Download the PDF file if not exists"""
        try:
            if not os.path.exists(self.pdf_path):
                st.info("Downloading PDF file...")
                pdf_url = 'https://arxiv.org/pdf/1706.03762'
                response = requests.get(pdf_url, timeout=30)
                response.raise_for_status()
                with open(self.pdf_path, 'wb') as file:
                    file.write(response.content)
                st.success("PDF downloaded successfully!")
            return True
        except Exception as e:
            st.error(f"Error downloading PDF: {e}")
            return False
    
    def pdf_extract(self, pdf_path: str) -> List[Document]:
        """Extract text from PDF"""
        try:
            if not os.path.exists(pdf_path):
                st.error(f"PDF file not found at {pdf_path}")
                return []
                
            loader = PyPDFLoader(pdf_path)
            documents = loader.load()
            st.info(f"Successfully loaded {len(documents)} pages from PDF")
            return documents
        except Exception as e:
            st.error(f"Error loading PDF: {e}")
            return []
    
    def pdf_chunk(self, documents: List[Document], chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
        """Split PDF text into chunks"""
        if not documents:
            st.warning("No documents to chunk")
            return []
            
        try:
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                length_function=len
            )
            chunks = text_splitter.split_documents(documents)
            st.info(f"Created {len(chunks)} chunks from documents")
            return chunks
        except Exception as e:
            st.error(f"Error chunking documents: {e}")
            return []
    
    def create_vector_store(self, chunks: List[Document], persist_directory: str) -> Chroma:
        """Create Chroma vector store"""
        try:
            embedding_model = HuggingFaceEmbeddings(
                model_name="BAAI/bge-large-en",
                model_kwargs={'device': 'cpu'}
            )
            
            vectordb = Chroma.from_documents(
                documents=chunks,
                embedding=embedding_model,
                persist_directory=persist_directory
            )
            st.success("Vector store created successfully!")
            return vectordb
        except Exception as e:
            st.error(f"Error creating vector store: {e}")
            return None
    
    def retrieve_context(self, db, query: str) -> List[Document]:
        """Retrieve relevant context from vector store"""
        try:
            retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
            relevant_chunks = retriever.invoke(query)
            return relevant_chunks
        except Exception as e:
            st.error(f"Error retrieving context: {e}")
            return []
    
    def build_context(self, relevant_chunks: List[Document]) -> str:
        """Build context string from relevant chunks"""
        if not relevant_chunks:
            return "No relevant context found."
            
        context = "\n".join([chunk.page_content for chunk in relevant_chunks])
        return context
    
    def get_context(self, inputs: dict) -> dict:
        """Get context for RAG chain"""
        try:
            pdf_path, query, db_path = inputs['pdf_path'], inputs['query'], inputs['db_path']
            
            # Create new vector store if it does not exist
            if not os.path.exists(db_path):
                st.info("Creating a new vector store...")
                pdf_text = self.pdf_extract(pdf_path)
                if not pdf_text:
                    return {'context': 'No PDF content available.', 'query': query}
                chunks = self.pdf_chunk(pdf_text)
                if not chunks:
                    return {'context': 'No chunks created from PDF.', 'query': query}
                db = self.create_vector_store(chunks, db_path)
                if not db:
                    return {'context': 'Failed to create vector store.', 'query': query}
            else:
                # Load existing vector store
                embedding_model = HuggingFaceEmbeddings(
                    model_name="BAAI/bge-large-en",
                    model_kwargs={'device': 'cpu'}
                )
                db = Chroma(persist_directory=db_path, embedding_function=embedding_model)
            
            relevant_chunks = self.retrieve_context(db, query)
            context = self.build_context(relevant_chunks)
            
            return {'context': context, 'query': query}
        except Exception as e:
            st.error(f"Error in get_context: {e}")
            return {'context': f'Error retrieving context: {e}', 'query': inputs['query']}
    
    def initialize_rag_chain(self):
        """Initialize the RAG chain"""
        try:
            if not openai_api_key:
                st.error("OpenAI API key not found. Please check your .env file.")
                return False
                
            template = """You are an AI model trained for question answering. You should answer the
            given question based on the given context only.
            Question: {query}
            
            Context: {context}
            
            If the answer is not present in the given context, respond as: The answer to this question is not available
            in the provided content.
            """
            
            rag_prompt = ChatPromptTemplate.from_template(template)
            llm = ChatOpenAI(
                model='gpt-4o-mini', 
                openai_api_key=openai_api_key, 
                temperature=0,
                max_retries=2
            )
            str_parser = StrOutputParser()
            
            self.rag_chain = (
                RunnableLambda(self.get_context)
                | rag_prompt
                | llm
                | str_parser
            )
            return True
        except Exception as e:
            st.error(f"Error initializing RAG chain: {e}")
            return False
    
    def setup(self):
        """Setup the RAG system"""
        try:
            # Download PDF
            if not self.download_pdf():
                return False
            
            # Create directory for vector store
            os.makedirs(self.persist_directory, exist_ok=True)
            
            # Initialize RAG chain
            if self.initialize_rag_chain():
                self.initialized = True
                st.success("RAG System initialized successfully!")
                return True
            else:
                st.error("Failed to initialize RAG chain")
                return False
                
        except Exception as e:
            st.error(f"Error during setup: {e}")
            return False

def save_qa_history(question, answer, context=""):
    """Save question and answer to session state"""
    if 'qa_history' not in st.session_state:
        st.session_state.qa_history = []
    
    qa_entry = {
        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        'question': question,
        'answer': answer,
        'context': context
    }
    
    st.session_state.qa_history.append(qa_entry)

def export_qa_history():
    """Export QA history as JSON file"""
    if 'qa_history' in st.session_state and st.session_state.qa_history:
        history_json = json.dumps(st.session_state.qa_history, indent=2)
        return history_json
    return None

def main():
    st.set_page_config(
        page_title="RAG System with Transformers Paper",
        page_icon="üìö",
        layout="wide"
    )
    
    st.title("ü§ñ RAG System with 'Attention Is All You Need' Paper")
    st.markdown("Ask questions about the famous Transformer paper!")
    
    # Check for OpenAI API key
    if not openai_api_key:
        st.error("‚ùå OpenAI API key not found. Please make sure you have a .env file with OPENAI_API_KEY=your_key")
        st.info("Create a .env file in the same directory with: OPENAI_API_KEY=your_key_here")
        return
    
    # Initialize RAG system
    if 'rag_system' not in st.session_state:
        st.session_state.rag_system = RAGSystem()
        with st.spinner("Initializing RAG system... This may take a few minutes."):
            success = st.session_state.rag_system.setup()
            if not success:
                st.error("Failed to initialize RAG system. Please check the errors above.")
                return
    
    # Initialize QA history if not exists
    if 'qa_history' not in st.session_state:
        st.session_state.qa_history = []
    
    # Check if system is properly initialized
    if not getattr(st.session_state.rag_system, 'initialized', False):
        st.error("RAG system not properly initialized. Please restart the app.")
        return
    
    # Display PDF info
    with st.expander("üìÑ PDF Information", expanded=False):
        if os.path.exists(st.session_state.rag_system.pdf_path):
            pdf_text = st.session_state.rag_system.pdf_extract(st.session_state.rag_system.pdf_path)
            if pdf_text:
                st.write(f"‚úÖ Number of pages in PDF: {len(pdf_text)}")
                chunks = st.session_state.rag_system.pdf_chunk(pdf_text)
                st.write(f"‚úÖ Number of chunks created: {len(chunks)}")
            else:
                st.error("‚ùå Failed to load PDF content")
        else:
            st.error("‚ùå PDF file not found")
    
    # Query section
    st.subheader("üí¨ Ask a Question")
    
    # Example questions
    col1, col2, col3 = st.columns(3)
    with col1:
        if st.button("Explain transformer model", use_container_width=True):
            st.session_state.question = "Explain transformer model in one line"
    with col2:
        if st.button("What is attention mechanism?", use_container_width=True):
            st.session_state.question = "What is attention mechanism?"
    with col3:
        if st.button("Key contributions", use_container_width=True):
            st.session_state.question = "What are the key contributions of this paper?"
    
    # Question input
    question = st.text_input(
        "Enter your question:",
        value=getattr(st.session_state, 'question', ''),
        placeholder="e.g., Explain transformer model in one line"
    )
    
    # Process question
    if st.button("Get Answer", type="primary") and question:
        if not st.session_state.rag_system.rag_chain:
            st.error("RAG chain not available. Please check if the system initialized correctly.")
            return
            
        with st.spinner("Searching for answer... This may take a few seconds."):
            try:
                answer = st.session_state.rag_system.rag_chain.invoke({
                    'pdf_path': st.session_state.rag_system.pdf_path,
                    'query': question,
                    'db_path': st.session_state.rag_system.persist_directory
                })
                
                st.subheader("üìù Answer:")
                st.success(answer)
                
                # Get context for saving
                inputs = {
                    'pdf_path': st.session_state.rag_system.pdf_path,
                    'query': question,
                    'db_path': st.session_state.rag_system.persist_directory
                }
                context_info = st.session_state.rag_system.get_context(inputs)
                context = context_info['context']
                
                # Save question and answer to history
                save_qa_history(question, answer, context)
                
                # Show retrieved context (optional)
                with st.expander("üîç View Retrieved Context"):
                    st.text_area("Retrieved Context:", context, height=200, key="context_view")
                    
            except Exception as e:
                st.error(f"Error processing question: {str(e)}")
                st.info("This might be due to: 1) PDF not loading properly, 2) Vector store issues, 3) API limits")
    
    # Display QA History
    if st.session_state.qa_history:
        st.subheader("üìö Question & Answer History")
        
        # Export functionality
        history_json = export_qa_history()
        if history_json:
            st.download_button(
                label="üì• Export QA History as JSON",
                data=history_json,
                file_name=f"qa_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
                mime="application/json",
                use_container_width=True
            )
        
        # Display history in reverse order (newest first)
        for i, qa in enumerate(reversed(st.session_state.qa_history)):
            with st.expander(f"Q: {qa['question'][:50]}... - {qa['timestamp']}", key=f"qa_{i}"):
                st.write(f"**Question:** {qa['question']}")
                st.write(f"**Answer:** {qa['answer']}")
                st.write(f"**Time:** {qa['timestamp']}")
                
                # Option to view context for each entry
                with st.expander("View Context for this question", key=f"context_exp_{i}"):
                    st.text_area(f"Context", 
                               qa['context'], 
                               height=150,
                               key=f"context_text_{i}")
        
        # Clear all history
        if st.button("üóëÔ∏è Clear All History", key="clear_all", use_container_width=True):
            st.session_state.qa_history = []
            st.rerun()
    else:
        st.info("No questions asked yet. Ask a question above to see the history here!")
    
    # System information
    with st.expander("‚öôÔ∏è System Information", expanded=False):
        st.write(f"‚úÖ PDF Path: {st.session_state.rag_system.pdf_path}")
        st.write(f"‚úÖ Vector DB Path: {st.session_state.rag_system.persist_directory}")
        st.write(f"‚úÖ Embedding Model: BAAI/bge-large-en")
        st.write(f"‚úÖ LLM Model: gpt-4o-mini")
        st.write(f"‚úÖ Total QA pairs saved: {len(st.session_state.qa_history)}")
        st.write(f"‚úÖ System Initialized: {st.session_state.rag_system.initialized}")

if __name__ == "__main__":
    main()

Writing rag_app1.py


In [94]:
# Run Streamlit in the background  #
#streamlit run rag_app1.py --logger.level=debug
# ! streamlit run rag_app1.py & npx localtunnel --port 8500

! streamlit run rag_app1.py


  You can now view your Streamlit app in your browser.

  Local URL: http://localhost:8501
  Network URL: http://192.168.1.3:8501



## streamlit sample

In [95]:
%%writefile test_app.py
import streamlit as st

st.title("Simple Test App")
st.write("If this works, then Streamlit is running properly")
name = st.text_input("Enter your name")
if name:
    st.write(f"Hello, {name}!")

Writing test_app.py


In [97]:
! streamlit run test_app.py

^C


In [75]:

# # Run Streamlit in the background
# ! streamlit run rag_app.py & npx localtunnel --port 8501