In [2]:
import os

GCP_PROJECT_ID = os.environ['GCP_PROJECT_ID']
os.environ["LANGCHAIN_PROJECT"] = "obot"
os.environ["LANGSMITH_TRACING"] = "true"


In [3]:
from langchain_google_firestore import FirestoreVectorStore
from langchain_google_vertexai import VertexAIEmbeddings

embedding_model = VertexAIEmbeddings(
    model_name="text-embedding-004",
    project=GCP_PROJECT_ID,
)

# # Create a vector store
vector_store = FirestoreVectorStore(
    collection="vector_index",
    embedding_service=embedding_model,
    content_field='content',   
    embedding_field='embedding',
)

retriever = vector_store.as_retriever(search_kwargs={"k": 5})

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI
from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory

safety_settings = {
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, 
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, 
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, 
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

instruct_llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-001",
    temperature=0.5,
    max_tokens=256,
    max_retries=6,
    safety_settings=safety_settings
)

qa_llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-001",
    temperature=0.7,
    max_tokens=1024,
    max_retries=6,
    safety_settings=safety_settings
)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_core.runnables.history import RunnableWithMessageHistory

class SafetyException(Exception):
    pass
def llm_output_safety_filter(ai_message):
    if ai_message.response_metadata['finish_reason'] == 'SAFETY':
        raise SafetyException("Chatbot thinks this message is unsafe\n{}".format(ai_message.response_metadata['safety_ratings']))
    return ai_message

contextualize_question_instruction_prompt = """Given the chat history and the latest user question above \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. The standalone question will be used to retrieve relevant information. \
Do NOT answer the question, just reformulate it if needed and otherwise return it as is.\
"""
contextualize_question_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        ("human", contextualize_question_instruction_prompt),
    ]
)

history_aware_retriever = create_history_aware_retriever(instruct_llm | llm_output_safety_filter, retriever, contextualize_question_prompt)     # Input must contain "input" and "chat_history"

qa_system_prompt = """You are a helpful assistant named Obot that answers questions about Oberlin College. \
Please be polite and appropriate. Answer the question in details. Please use the following context to answer the question:
==============
{context}
"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

qa_chain = create_stuff_documents_chain(qa_llm | llm_output_safety_filter, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

session_store = {}

def get_session_history(session_id):
    if session_id not in session_store:
        session_store[session_id] = InMemoryChatMessageHistory()
    return session_store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    output_messages_key="answer",
    history_messages_key="chat_history",
)

In [6]:
import langchain
langchain.debug = True
conversational_rag_chain.invoke(
    {"input": "what is the requirement of oberlin college"},
    config={
        "configurable": {"session_id": "abc123"}
    },
)

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableWithMessageHistory] Entering Chain run with input:
[0m{
  "input": "what is the requirement of oberlin college"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableWithMessageHistory > chain:insert_history] Entering Chain run with input:
[0m{
  "input": "what is the requirement of oberlin college"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableWithMessageHistory > chain:insert_history > chain:RunnableParallel<chat_history>] Entering Chain run with input:
[0m{
  "input": "what is the requirement of oberlin college"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableWithMessageHistory > chain:insert_history > chain:RunnableParallel<chat_history> > chain:load_history] Entering Chain run with input:
[0m{
  "input": "what is the requirement of oberlin college"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableWithMessageHistory > chain:insert_history > chain:RunnableParallel<chat_history> > chain:load_history] [16ms] Exiting Chain 

{'input': 'what is the requirement of oberlin college',
 'chat_history': [],
 'context': [Document(metadata={'reference': {'path': 'vector_index/2CA0EcSMbDIzPowjVv7z', 'firestore_type': 'document_reference'}, 'embedding': {'values': [0.04578891023993492, 0.027997978031635284, -0.05038174241781235, -0.05254003033041954, 0.027686048299074173, 0.010768449865281582, 0.011005670763552189, -0.012983220629394054, -0.025223473086953163, 0.021929912269115448, 0.018495788797736168, 0.009444654919207096, 0.0518554225564003, 0.02525728940963745, -0.0038061433006078005, -0.02461967244744301, 0.024024616926908493, -0.004065008368343115, -0.08157593011856079, 0.024516506120562553, 0.027512667700648308, 0.0017605304019525647, 0.04893380403518677, -0.06614363193511963, -0.002507320838049054, 0.0011841300874948502, 0.034971341490745544, 0.00011326910316711292, -0.017499906942248344, -0.014841449446976185, 0.0014301813207566738, 0.04042982682585716, 0.017249051481485367, -0.005820282269269228, 0.03369298

In [33]:
for message in session_store['abc123'].messages:
    print(message)

content='what is the requirement for piano major'
content='The Oberlin College Performance Major: Piano Concentration has several requirements:\n\n**Course Requirements:**\n\n* **Music Theory and Aural Skills:**  All BM students must complete a core of music theory and aural skills classes, including Music Theory I-II (MUTH 131 and 132), two additional music theory courses at the 200 level (MUTH 250-299), and Aural Skills I-IV (MUTH 101, 102, 201, 202).\n* **Specific Piano Courses:**  The specific courses required are detailed in a grid, which is not provided in the context you provided. You would need to refer to the Oberlin College course catalog for this information.\n* **Accompanying Courses:** Piano majors can take the following accompanying courses:\n    * APST 112 - Keyboard Accompanying (Vocal)\n    * APST 113 - Keyboard Accompanying (Instrumental)\n    * APST 204 - Interpretation of Art Song\n    * MLIT 220 - The Lied\n    * MLIT 221 - The Mlodie\n\n**Performance Requirements:

In [27]:
import os
from langchain_google_firestore import FirestoreVectorStore
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.messages import HumanMessage, AIMessage
from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory

GCP_PROJECT_ID = os.environ["GCP_PROJECT_ID"]
os.environ["LANGCHAIN_PROJECT"] = "obot"
os.environ["LANGSMITH_TRACING"] = "true"

class ConversationalRAG:
    def __init__(self):
        embedding_model = VertexAIEmbeddings(
            model_name="text-embedding-004",
            project=GCP_PROJECT_ID,
        )

        vector_store = FirestoreVectorStore(
            collection="vector_index",
            embedding_service=embedding_model,
            content_field='content',   
            embedding_field='embedding',
        )

        retriever = vector_store.as_retriever(search_kwargs={"k": 5})

        safety_settings = {
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, 
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, 
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, 
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        }

        instruct_llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash-001",
            temperature=0.5,
            max_tokens=256,
            max_retries=6,
            safety_settings=safety_settings
        )

        qa_llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash-001",
            temperature=0.7,
            max_tokens=1024,
            max_retries=6,
            safety_settings=safety_settings
        )
        
        class SafetyException(Exception):
            pass
        def llm_output_safety_filter(ai_message):
            if ai_message.response_metadata['finish_reason'] == 'SAFETY':
                raise SafetyException("Chatbot thinks this message is unsafe\n{}".format(ai_message.response_metadata['safety_ratings']))
            return ai_message

        contextualize_question_instruction_prompt = """Given the chat history and the latest user question above \
        which might reference context in the chat history, formulate a standalone question \
        which can be understood without the chat history. The standalone question will be used to retrieve relevant information. \
        Do NOT answer the question, just reformulate it if needed and otherwise return it as is.\
        """

        self.contextualize_question_prompt = ChatPromptTemplate.from_messages(
            [
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
                ("human", contextualize_question_instruction_prompt),
            ]
        )

        history_aware_retriever = create_history_aware_retriever(instruct_llm | llm_output_safety_filter, retriever, self.contextualize_question_prompt)

        qa_system_prompt = """You are a helpful assistant named Obot that answers questions about Oberlin College. \
        Please be polite and appropriate. Answer the question in details. Please use the following context to answer the question:
        ==============
        {context}
        """
        qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", qa_system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}")
            ]
        )

        qa_chain = create_stuff_documents_chain(qa_llm | llm_output_safety_filter, qa_prompt)

        rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

        self.session_store = {}

        def get_session_history(session_id):
            if session_id not in self.session_store:
                self.session_store[session_id] = InMemoryChatMessageHistory()
            return self.session_store[session_id]
        
        self.conversational_rag_chain = RunnableWithMessageHistory(
            rag_chain,
            get_session_history,
            input_messages_key="input",
            output_messages_key="answer",
            history_messages_key="chat_history",
        )

    def get_completion(self, input, session_id="default"):
        return self.conversational_rag_chain.invoke(
            {"input": input},
            config={
                "configurable": {"session_id": session_id}
            },
        )["answer"]
    
    def get_chat_history(self, session_id="default"):
        messages = []
        if session_id in self.session_store:
            # print(st.session_state.session_store[session_id].messages)
            for message in self.session_store[session_id].messages:
                if isinstance(message, HumanMessage):
                    messages.append({"role": "user", "content": message.content})
                elif isinstance(message, AIMessage):
                    messages.append({"role": "assistant", "content": message.content})
        return messages

In [28]:
import langchain
langchain.debug = False
conversational_rag = ConversationalRAG()

In [33]:
conversational_rag.get_completion("is piano major only for rich people?")

SafetyException: Chatbot thinks this message is unsafe
[{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'MEDIUM', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'MEDIUM', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]

In [32]:
print(conversational_rag.session_store['default'])

Human: what is the requirement of piano major
AI: The Oberlin College Performance Major: Piano Concentration has several requirements:

**Course Requirements:**

* **Music Theory and Aural Skills:** All BM students complete a core of music theory and aural skills classes, including Music Theory I-II (MUTH 131 and 132), two additional music theory courses at the 200 level, and Aural Skills I-IV (MUTH 101, 102, 201, 202). 
* **Piano Concentration Specific Courses:** The specific courses required for the piano concentration are not detailed in the provided text. You can find this information on the official Oberlin College website or in their course catalog.

**Performance Requirements:**

* **Junior Recital:** Students are required to perform a junior recital.
* **Senior Recital:**  Students are required to perform a senior recital, unless they are in the Honors program.
* **Accompanying Courses:**  Piano majors are required to complete three semesters of accompanying courses (APST 112, 