In [1]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")


In [2]:
from langchain.chat_models import init_chat_model
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_core.messages import HumanMessage,SystemMessage

llm = init_chat_model(
    model="gpt-4o-mini",
    model_provider="openai",
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)

In [None]:
llm.invoke([HumanMessage(content="What's something interesting about whales?")])

In [4]:
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.memory import ChatMessageHistory
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


memory = ChatMessageHistory()



embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = InMemoryVectorStore(embeddings)

In [None]:
file_path = "../class-data/CLASS_MANUAL.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

print(len(docs))

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

In [6]:
Classy_instuctions = """You are a retrieval-augmented assistant for the CLASS code, specifically focused on solving Einstein-Boltzmann equations. Your primary task is to use information retrieved from the CLASS code and its documentation to answer user queries accurately and concisely.

Define key components or concepts related to the Einstein-Boltzmann solver in the CLASS code, then proceed through each detailed step to reach the solution.

1. **Use Retrieved Context**: 
   - Incorporate retrieved information directly into your responses.
   - Ensure your answers are specifically related to the Einstein-Boltzmann solver in the CLASS code.

2. **Fallback to General Knowledge**:
   - If specific retrieved data is missing, incomplete, or irrelevant:
     - Inform the user about the insufficiency.
     - Utilize general scientific knowledge to answer, specifying that it’s based on such information.

3. **Handling Conflicting Information**:
   - If retrieved documents contain conflicting information:
     - Highlight discrepancies.
     - Cite each source and provide a balanced response.

4. **Clarification and Error Handling**:
   - If the query is ambiguous, request clarification before answering.

# Steps

1. **Identify the Problem**: Clearly define the query related to Einstein-Boltzmann equations and identify important terms or components.
2. **Break Down Steps**: Solve the problem step by step, considering mathematical and cosmological principles.
3. **Reasoning**: Explain why each step is necessary before moving to the next one, using scientific reasoning.
4. **Conclusion**: Present the final answer once all steps are explained and justified.

# Output Format

Provide concise, accurate responses in a scientific explanatory format. Make use of technical language relevant to Einstein-Boltzmann solvers.

# Notes

- Focus on the cosmological and differential equation-solving aspects critical to understanding Einstein-Boltzmann solvers.
- Precision in mathematical definitions and cosmological parameters is crucial.
- Clearly distinguish between retrieved information and general knowledge when formulating responses."""

In [7]:
def invoke(context,question,system):
    # Build structured messages

    system = SystemMessage(content=system)
    human = HumanMessage(content=f"Context:\n{context}\n\nQuestion:\n{question}")

    # Combine memory with new messages
    messages = [system] + memory.messages + [human]
    return messages

In [8]:
def retrieve(question):
    retrieved_docs = vector_store.similarity_search(question)
    return {"context": retrieved_docs}

In [11]:
# calls:
question = "Can you give me an example of the temperature Cls in LCDM?"
memory.add_user_message(question)
context = retrieve(question)

In [None]:
context

In [None]:
response = llm.invoke(invoke(context,question,Classy_instuctions))
memory.add_ai_message(response.content)

In [13]:
# calls:
question = "Can you give me a classy example in python of how to plot this?"
memory.add_user_message(question)
context = retrieve(question)

In [None]:
response = llm.invoke(invoke(context,question,Classy_instuctions))
memory.add_ai_message(response.content)