In [None]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import (
    RetrievalQA,
    ConversationalRetrievalChain,
    RetrievalQAWithSourcesChain,
)
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import LlamaCpp
from langchain_openai.llms import OpenAI
from langchain.retrievers import EnsembleRetriever
from langchain_core.prompts import PromptTemplate

import pandas as pd
from ragatouille import RAGPretrainedModel
from semantic_router import Route, RouteLayer
from semantic_router.encoders import HuggingFaceEncoder

import gradio as gr
from json2html import Json2Html

In [None]:
# Setting some helper variables

model = "mistral-7b-instruct-v0.2"
model_path = "/Users/arnewman/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf"

rootdir = ".."
persist_directory = "./../embeddings"
index_root = rootdir + "/../colbert_index/"
colbert_path = rootdir + "/../colbertv2.0/"
index_path = rootdir + "/../colbert_index/colbert/indexes/documents/"
transcript_path = rootdir + "/../colbert_index/colbert/indexes/transcripts/"
combined_path = rootdir + "/../colbert_index/colbert/indexes/combined/"

In [None]:
# Start Colbert model for documents and transcripts
RAG = RAGPretrainedModel.from_index(index_path = combined_path)

# Get metadata and convert to DataFrame
df_rag = pd.read_json(combined_path+'docid_metadata_map.json').T.reset_index()

# Helper to identify relevant documents for retrievers
def filter_pids(df, search_term):
    return list(df['index'][df.course_number==search_term])

In [None]:
# MADS course numbers and titles
courses = {
    "501": "Being a Data Scientist",
    "502": "Math Methods I",
    "503": "Data Science Ethics",
    "505": "Data Manipulation",
    "511": "SQL and Databases",
    "515": "Efficient Data Processing",
    "516": "Big Data: Scalable Data Processing",
    "521": "Visual Exploration of Data",
    "522": "Information Visualization I",
    "523": "Communicating Data Science Results",
    "524": "Presenting Uncertainty",
    "532": "Data Mining I",
    "542": "Supervised Learning",
    "543": "Unsupervised Learning",
    "571": "Business SQL",  # No syllabus for this one :(
    "593": "Milestone I",
    "601": "Qualitative Inquiry for Data Scientists",
    "602": "Math Methods II",
    "611": "Database Architecture & Technology",
    "622": "Information Visualization II",
    "630": "Causal Inference",
    "631": "Experiment Design and Analysis",
    "632": "Data Mining II",
    "642": "Deep Learning I",
    "643": "Machine Learning Pipelines",
    "644": "Reinforcement Learning Algorithms",
    "652": "Network Analysis",
    "655": "Applied Natural Language Processing",
    "673": "Cloud Computing",
    "680": "Learning Analytics and Educational Data Science",
    "681": "Health Analytics",
    "682": "Social Media Analytics",
    "685": "Search and Recommender Systems",
    "687": "Introduction to Sports Analytics",
    "688": "Data Science for Social Good",
    "696": "Milestone II",
    "699": "Capstone",
}

In [None]:
# Build semantic routes for each class
routes = []
for num, name in zip(courses.keys(), courses.values()):
    route_name = "SIADS " + str(num)
    route_utterances = [route_name.lower(), route_name.lower().replace(" ",""),
                        name.lower(), name.lower() + " class", name.lower() + " course",
                       "who teaches " + route_name.lower(), "who teaches " + name.lower()]
    routes.append(Route(name=route_name, utterances=route_utterances))

# Select local encoder and build route layer
encoder = HuggingFaceEncoder(str="/Users/arnewman/.cache/huggingface/hub/models--sentence-transformers--UAE-Large-V1/", device="mps")
rl = RouteLayer(encoder=encoder, routes=routes)    

In [None]:
# Use evaluation questions to test accuracy
# From: https://github.com/aurelio-labs/semantic-router/blob/main/docs/06-threshold-optimization.ipynb

test_data = [
    ("Which class involves time series analysis?", "SIADS 632"),
    ("Who teaches the SQL and Databases class?", "SIADS 511"),
    ("What are the prerequisites for Data Science for Social Good?", "SIADS 688"),
    ("When are the office hours for the Math Methods course?", "SIADS 502"),
    ("Are there any weekly readings for Milestone II?", "SIADS 699"),
    ("What are the outcomes of Qualitative Inquiry?", "SIADS 601"),
    ("What textbook is required for SIADS 505?", "SIADS 505"),
    ("What textbook is required for Data Manipulation?", "SIADS 505"),
    ("Which week of unsupervised learning covers DBSCAN?", "SIADS 543"),
    ("How many credits are required to complete the MADS program?", None),
    ("How long do students have to complete the MADS program start to finish?", None),
    ("How many points is the comprehensive oral exam worth in SIADS 593?", "SIADS 593"),
    ("What is the penalty for late submission in SIADS 630?", "SIADS 630"),
    ("How do I get accommodations for a class?", None),
    ("What is a backpack?", None),
    ("When is the latest I can drop a course?", None),
    ("How do I get an override to take a class?", None),
    ("How do I take a leave of absence from the MADS program?", None),
    ("What are the prerequisites for Search and Recommender Systems?", "SIADS 685")
]

# unpack the test data
X, y = zip(*test_data)

# evaluate using the default thresholds
accuracy = rl.evaluate(X=X, y=y)
print(f"Original Accuracy: {accuracy*100:.2f}%")

# Check defaults
# route_thresholds = rl.get_thresholds()
# print("Default route thresholds:", route_thresholds)

# Call the fit method
rl.fit(X=X, y=y)

# Updated thresholds
# route_thresholds = rl.get_thresholds()
# print("Updated route thresholds:", route_thresholds)

# evaluate using the new thresholds
accuracy = rl.evaluate(X=X, y=y)
print(f"Revised Accuracy: {accuracy*100:.2f}%")

In [None]:
# Load model - two options

# This option assumes that we are running an OpenAI-compatible
# endpoint, in this case on the local host. We can do this with
# llama-cpp-python. This has the advantage of simulating the use
# of a remote LLM.
llm_open = OpenAI(openai_api_base = "http://localhost:7999/v1",
                  model = "mistral-7b-instruct-v0.2",
                  openai_api_key = "hello",
                  temperature = 0.1,
                  top_p = 1,
                  max_tokens = 1024,
                  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
                 )

# This option also uses llama-cpp-python but starts up the model
# within the notebook. This does not require spinning up a separate
# endpoint, although the notebook will necessarily use more memory.
# llm_open = LlamaCpp(model_path=model_path,
#                     n_ctx=32768,
#                     n_gpu_layers=-1,
#                     temperature=0.1,
#                     top_p=1,
#                     top_k=40,
#                     repeat_penalty=1.1,
#                     max_tokens=1024,
#                     callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]),
#                     #stream=True,
#                    )

memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True,
                                  output_key = "result")


# Set prompt template
template = '''
Use only the following pieces of context to answer the question at the end. 
Keep your answers concise and do not provide additional explanations or interpretations. 
If the answer cannot be deduced from the context, just say that you don't know the answer, don't try to make up an answer.

Context: {context}

Question: {question}
Helpful Answer:
'''

In [None]:
# Select relevant documents based on query and create retrievers
# If relevant documents cannot be identified, build retrievers on all documents
# Separate retrievers from the documents index and the transcripts index

def get_chain(query, k=5):
    r = rl(query)

    if r.name:
        doc_list = filter_pids(df_rag, r.name)
        if len(doc_list) > 0:
            retriever = RAG.as_langchain_retriever(doc_ids=doc_list, k=k) # Set k if desired
        else:
            retriever = RAG.as_langchain_retriever(k=k)
        print(f"Selected route: {r.name}")
    else:
        retriever = RAG.as_langchain_retriever(k=k)
        print(f"Selected route: None. Using all documents.")
    
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm_open,
        chain_type="stuff",
        memory=memory,
        retriever=retriever,
        return_source_documents=True,
        verbose=True,
        chain_type_kwargs={
            "prompt": PromptTemplate(
                template=template,
                input_variables=["context", "question"])},
    )

    return qa_chain

In [None]:
j = Json2Html()

# Grab the metadata for the first n relevant documents
# and convert to an HTML table for display.
def create_source_html(response, n = 5):
    source_doc_list = []
    num_docs = len(response["source_documents"])
    len_res = n if n <= num_docs else num_docs
    for doc in response["source_documents"][:len_res]:
        doc_json = {}
        doc_json['metadata'] = doc.metadata
        doc_json['page_content'] = doc.page_content
        source_doc_list.append(doc_json)
    return j.convert(source_doc_list)        

In [None]:
with gr.Blocks(theme=gr.themes.Glass()) as demo:
    gr.Markdown(
        f"""

        # MADS-RAG: A Helpful Chatbot for Master's of Applied Data Science
        # Students at the University of Michigan

        ## Current model running is {model}.

        Source data includes class syllabi, the MADS Student Handbook,
        MADS Advising FAQs, and transcripts of class videos. Some source
        data may be out of date.

        Please note that answers provided by this chatbot protoype may not
        be correct and should be verified through other means.
        """
    )
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    with gr.Accordion("Source Documents", open=False):
        html = gr.HTML(label="Source Documents", show_label=True)
    clear = gr.ClearButton(value="Clear", components=[msg,chatbot,html])
    
    def user(user_message, chat_history):
        qa_chain = get_chain(user_message)
        response = qa_chain(user_message)
        chat_history.append((user_message, response["result"]))
        res_html = create_source_html(response, n=3)
        yield gr.update(value=""), chat_history, res_html

    msg.submit(user, [msg, chatbot], [msg, chatbot, html], queue=False)

demo.launch(debug=True, share=True, auth=("madsuser", "winter2024"),
            server_name="0.0.0.0", server_port=5678)

In [None]:
# Function to display results
# def process_llm_response(llm_response):
#     print(f"\nQuestion: {llm_response['query']}")
#     print(f"\nAnswer: {llm_response['result']}")
#     print("\n\nSources:")
#     for i, source in enumerate(llm_response["source_documents"]):
#         m = source.metadata
#         try:
#             print(f"{i + 1}. {m['course_title']} ({m['course_number']}): {m['heading']}")
#         except:
#             print(f"{i + 1}. {m['course_title']} ({m['course_number']}): {m['source']}")

In [None]:
# llm_response = qa_chain(query)
# process_llm_response(llm_response)