In [None]:
from jupyter_chat import *
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
import bs4
import re
from dotenv import load_dotenv
import os
load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = api_key

In [None]:
# Prepare document retriever

# Load the document from a website
loader = WebBaseLoader(
    web_path="https://en.wikipedia.org/wiki/2024_United_States_presidential_election"
)
docs = loader.load()

# Split the document into parts
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [None]:
# Embed the parts and put them in a vectorstore
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [None]:
# Define a function that combine a set of document parts into a string, also removing excessive whitespaces

def documents_to_text(docs):
    return "\n\n".join([re.sub(r'\s+', ' ', doc.page_content) for doc in docs])

In [None]:
prompt_template = \
"""You are a helpful assistant. You know the following information:

-----
{0}
-----
"""

In [None]:
# Define chatbot 

class MyChatbot:

    def __init__(self):
        self.model = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.7)
        self.systemPrompt = ""
        self.reset()

    def prompt(self):
        return [SystemMessage(content=self.systemPrompt)] + self.history
        
    def question(self, question):
        self.history.append(HumanMessage(content=question))

    def answer(self, question):
        docs = retriever.get_relevant_documents(question)
        information = documents_to_text(docs)
        self.systemPrompt = prompt_template.format(information)
        ai_message = self.model.invoke(self.prompt())
        self.history.append(ai_message)
        
    def reset(self):
        self.history = []

In [None]:
# Start chat

chatbot = MyChatbot()

start_chat(chatbot)