In [7]:
%pip install pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.6-py3-none-any.whl.metadata (42 kB)
Collecting pdfminer.six==20250327 (from pdfplumber)
  Downloading pdfminer_six-20250327-py3-none-any.whl.metadata (4.1 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-win_amd64.whl.metadata (48 kB)
Collecting cryptography>=36.0.0 (from pdfminer.six==20250327->pdfplumber)
  Downloading cryptography-44.0.2-cp39-abi3-win_amd64.whl.metadata (5.7 kB)
Collecting cffi>=1.12 (from cryptography>=36.0.0->pdfminer.six==20250327->pdfplumber)
  Downloading cffi-1.17.1-cp312-cp312-win_amd64.whl.metadata (1.6 kB)
Collecting pycparser (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six==20250327->pdfplumber)
  Using cached pycparser-2.22-py3-none-any.whl.metadata (943 bytes)
Downloading pdfplumber-0.11.6-py3-none-any.whl (60 kB)
Downloading pdfminer_six-20250327-py3-none-any.whl (5.6 MB)
   ---------------------------------------- 0.0/5.6 MB ? eta -:--:--
   -------------

In [1]:
# Load API keys from .env file
from dotenv import load_dotenv

load_dotenv(override=True)

True

In [2]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# Define the chat prompt template with system message and history placeholder
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a Question-Answering chatbot. Please provide an answer to the given question.",
        ),
        # Note: Keep 'chat_history' as the key name for maintaining conversation context
        MessagesPlaceholder(variable_name="chat_history"),
        # Format user question as input variable {question}
        ("human", "#Question:\n{question}"),
    ]
)

# Initialize the ChatGPT language model
llm = ChatOpenAI()

# Build the processing chain: prompt -> LLM -> string output
chain = prompt | llm | StrOutputParser()

In [3]:
# Initialize an empty dictionary to store conversation sessions
store = {}

# Get or create chat history for a given session ID
def get_session_history(session_ids):
    print(f"[Conversation Session ID]: {session_ids}")
    
    if session_ids not in store:     
        # Initialize new chat history for this session
        store[session_ids] = ChatMessageHistory()
    return store[session_ids]  # Return existing or newly created chat history

# Configure chain with conversation history management
chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,  
    input_messages_key="question",  # User input variable name
    history_messages_key="chat_history",  # Conversation history variable name
)

### Process Initial input

In [4]:
chain_with_history.invoke(

    # User input message
    {"question": "My name is Kevin."},
    
    # Configure session ID for conversation tracking
    config={"configurable": {"session_id": "abc123"}},
)

[Conversation Session ID]: abc123


'Hello, Kevin! How can I assist you today?'

### Handle follow up query

In [5]:
chain_with_history.invoke(

    # User follow-up question
    {"question": "What is my name?"},

    # Use same session ID to maintain conversation context
    config={"configurable": {"session_id": "abc123"}},
)

[Conversation Session ID]: abc123


'Your name is Kevin.'

## Combined with RAG

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from operator import itemgetter

loader = PDFPlumberLoader("documents/HowtoWriteNGSSLessonPlans.pdf") 
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
split_documents = text_splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()

vectorstore = FAISS.from_documents(documents=split_documents, embedding=embeddings)

retriever = vectorstore.as_retriever()

prompt = PromptTemplate.from_template(
    """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know.

#Previous Chat History:
{chat_history}

#Question: 
{question} 

#Context: 
{context} 

#Answer:"""
)

llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "chat_history": itemgetter("chat_history"),
    }
    | prompt
    | llm
    | StrOutputParser()
)

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


In [3]:
# Dictionary for storing session records
store = {}

# Retrieve session records by session ID
def get_session_history(session_ids):
    print(f"[Conversation Session ID]: {session_ids}")

    if session_ids not in store:
        # Initialize new ChatMessageHistory and store it
        store[session_ids] = ChatMessageHistory()
    return store[session_ids]  

# Create RAG chain with conversation history tracking
rag_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,  # Session history retrieval function
    input_messages_key="question",  # Template variable key for user question
    history_messages_key="chat_history",  # Key for conversation history
)

In [4]:
rag_with_history.invoke(

    # User query for analysis
    {"question": "What is important in building an NGSS aligned unit?"},

    # Session configuration for conversation tracking
    config={"configurable": {"session_id": "rag123"}},

)

[Conversation Session ID]: rag123


'In building an NGSS-aligned unit, it is important to focus on several key aspects:\n\n1. **Performance Expectations (PEs):** Start with a Performance Expectation to guide what students should know by the end of a topic. This helps in developing lessons and activities that align with these expectations.\n\n2. **Three Dimensions of NGSS:** Incorporate the three dimensions into lesson plans: \n   - **Science and Engineering Practices:** Engage students in practices to explore phenomena.\n   - **Crosscutting Concepts:** Use these concepts to support understanding of core ideas.\n   - **Disciplinary Core Ideas:** Focus on the core ideas that students need to understand.\n\n3. **Student Ideas and Prior Knowledge:** Consider commonly-held student ideas and prior concepts that are necessary for understanding the core ideas. Build on these ideas during instruction.\n\n4. **Flexibility and Adaptation:** There is flexibility in preparing lesson plans, allowing adaptation to state, school distric

In [5]:
rag_with_history.invoke(

    # Request for translation of previous response
    {"question": "Please translate the previous answer into Spanish."},

    # Session configuration for maintaining conversation context
    config={"configurable": {"session_id": "rag123"}},
    
)

[Conversation Session ID]: rag123


'En la construcción de una unidad alineada con NGSS, es importante centrarse en varios aspectos clave:\n\n1. **Expectativas de Desempeño (PEs):** Comienza con una Expectativa de Desempeño para guiar lo que los estudiantes deben saber al final de un tema. Esto ayuda a desarrollar lecciones y actividades que se alineen con estas expectativas.\n\n2. **Tres Dimensiones de NGSS:** Incorpora las tres dimensiones en los planes de lecciones:\n   - **Prácticas de Ciencia e Ingeniería:** Involucra a los estudiantes en prácticas para explorar fenómenos.\n   - **Conceptos Transversales:** Utiliza estos conceptos para apoyar la comprensión de ideas centrales.\n   - **Ideas Centrales Disciplinarias:** Enfócate en las ideas centrales que los estudiantes necesitan entender.\n\n3. **Ideas y Conocimientos Previos de los Estudiantes:** Considera las ideas comúnmente sostenidas por los estudiantes y los conceptos previos necesarios para entender las ideas centrales. Construye sobre estas ideas durante la 