In [1]:
import os

os.environ['USER_AGENT'] = 'doge'

In [2]:
# Instantiate OpenAI client
!pip install -qU langchain_openai
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [3]:
# Load Wikipedia Article
# Takes quite a bit of time due to the length of the Wiki article

!pip install -qU langchain_community
from langchain_community.document_loaders import WikipediaLoader

# Can sometimes error stating no explicit parser was identified
# Most likely an internal error with WikipediaLoader, since there 
# isn't any parameters that let you pass in a parser.
docs = WikipediaLoader("Department_of_Government_Efficiency").load()

In [4]:
# Split the article into smaller chunks for better indexing
!pip install -qU langchain
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

In [10]:
# Inject splits into vector database
!pip install -qU langchain_chroma
from langchain import hub
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

database = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = database.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)



In [12]:
rag_chain.invoke("What is DOGE?")

'DOGE stands for the Department of Government Efficiency, officially the U.S. DOGE Service Temporary Organization. It is an initiative of the second Trump administration, informally led by Elon Musk, aimed at modernizing federal technology and implementing federal spending cuts and deregulation. The organization is set to conclude on July 4, 2026.'

In [13]:
rag_chain.invoke("Was Vivek part of DOGE?")

'Vivek Ramaswamy was initially announced as a co-leader of DOGE alongside Elon Musk but stepped away from the project before it began. He did this to prepare for running for governor in Ohio. Therefore, he was not part of DOGE when it launched.'

In [14]:
rag_chain.invoke("What does the retrieved context say about DOGE's involvement with the US government?")

'The retrieved context indicates that DOGE\'s actions have led to significant controversy, including accusations of violating the U.S. Constitution and causing a potential constitutional crisis. Some Democratic members criticized DOGE for lacking authority, describing its actions as a "takeover." Meanwhile, the White House and Republican Party defended DOGE and Musk, asserting that they are in full compliance with regulations.'

In [24]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

keyword_retriever = BM25Retriever.from_documents(splits)
keyword_retriever.k = 3

ensemble_retriever = EnsembleRetriever(retrievers=[retriever, keyword_retriever], weights=[0.3, 0.7])

In [38]:
# --- Cohere Contextual Compression Integration ---
!pip install -qU langchain-cohere
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from :class:`~langchain_cohere import Cohere`

# Initialize Cohere LLM for compression (set temperature=0 for deterministic results)
cohere_llm_for_compression = Cohere(temperature=0)
# Instantiate the Cohere reranker compressor using the specified model
compressor = CohereRerank(model="rerank-english-v3.0")

# Wrap the ensemble retriever with the ContextualCompressionRetriever
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=ensemble_retriever
)


SyntaxError: invalid syntax (2238201209.py, line 5)

In [28]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, ensemble_retriever, contextualize_q_prompt
)

In [29]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [30]:
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [31]:
conversational_rag_chain.invoke(
    {"input": "What does the retrieved context say about DOGE in the US government?"},
    config={
        "configurable": {"session_id": "doge-session"}
    },
)["answer"]

'DOGE has been linked to Trump\'s campaign promises regarding federal spending and efficiency, with proposals for a department aimed at streamlining government. Its actions have faced criticism for potentially violating the Constitution, leading to accusations of a "takeover" and raising conflict of interest concerns for Musk\'s companies. The White House and Republican Party have defended DOGE and Musk, asserting compliance with government regulations.'

In [32]:
conversational_rag_chain.invoke(
    {"input": "How many lawsuits?"},
    config={"configurable": {"session_id": "doge-session"}},
)["answer"]

'Three lawsuits had been filed in the United States District Court for the District of Columbia regarding DOGE as of January 21.'