In [15]:
import logging
import os
import pandas as pd
import re
import warnings
import inspect
import openai
from openai import OpenAI

from lsst.summit.utils.utils import getCurrentDayObs_int, getSite
from lsst.utils.iteration import ensure_iterable
from IPython.display import display, Markdown

In [10]:
import langchain  # noqa: E402
import langchain_community
from langchain_openai import ChatOpenAI
from langchain.callbacks import get_openai_callback  # noqa: E402
from langchain_experimental.agents import create_pandas_dataframe_agent

In [25]:
def setApiKey(filename="~/.openaikey.txt"):
    """Set the OpenAI API key from a file.

    Set the OpenAI API key from a file. The file should contain a single line
    with the API key. The file name can be specified as an argument. If the
    API key is already set, it will be overwritten, with a warning issues.

    Parameters
    ----------
    filename : `str`
        Name of the file containing the API key.
    """

    currentKey = os.getenv('OPENAI_API_KEY')

    filename = os.path.expanduser(filename)
    with open(filename, 'r') as file:
        apiKey = file.readline().strip()

    openai.api_key = apiKey
    os.environ["OPENAI_API_KEY"] = apiKey

In [21]:
def getObservingData(dayObs=None):
    """Get the observing metadata for the current or a past day.
    """
    currentDayObs = getCurrentDayObs_int()
    if dayObs is None:
        dayObs = currentDayObs
    isCurrent = dayObs == currentDayObs

    site = getSite()

    filename = None
    if site == 'summit':
        filename = f"/project/rubintv/sidecar_metadata/dayObs_{dayObs}.json"
    elif site in ['rubin-devl']:
        filename = f"/sdf/home/m/mfl/u/rubinTvDataProducts/sidecar_metadata/dayObs_{dayObs}.json"
    elif site in ['staff-rsp']:
        filename = f"/home/m/mfl/u/rubinTvDataProducts/sidecar_metadata/dayObs_{dayObs}.json"
    else:
        raise RuntimeError(f"Observing metadata not available for site {site}")

    # check the file exists, and raise if not
    if not os.path.exists(filename):
        return pd.DataFrame()

    table = pd.read_json(filename).T
    table = table.sort_index()

    # remove all the columns which start with a leading underscore, as these
    # are used by the backend to signal how specific cells should be colored
    # on RubinTV, and for nothing else.
    table = table.drop([col for col in table.columns if col.startswith('_')], axis=1)

    return table

In [29]:
setApiKey()
dayObs = 20230131
data = getObservingData(dayObs)

In [30]:
client = OpenAI()
  
assistant = client.beta.assistants.create(
    name="Astronomy Assistant",
    description="Assists with fetching and analyzing observational data for astronomy.",
    instructions="Help with astronomy calculations and data formatting.",
    tools=[{"type": "code_interpreter"}],
    model="gpt-4o",
)

In [31]:
thread = client.beta.threads.create()

In [35]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=f"What is the total darktime for Image type = bias in {data}"
)

In [36]:
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions="Provide steps."
)

In [38]:
if run.status == 'completed': 
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
else:
  print(run.status)

SyncCursorPage[Message](data=[Message(id='msg_w0oUbEzex3jeHORAuuNrSTTJ', assistant_id='asst_pfdWkQWNgje3wTRXcxxcos4c', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='The total dark time for images of type "bias" is 0.015542 seconds.'), type='text')], created_at=1717769674, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_3OPhrZc9VhhFTrWb1rPKPjAx', status=None, thread_id='thread_eio3cZBPWeHpkQZ4nli6NHtn'), Message(id='msg_5IDsyGLxwIMwf9jO26gkLoAl', assistant_id='asst_pfdWkQWNgje3wTRXcxxcos4c', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='To find the total dark time for images of type "bias," we need to filter the data for rows where the "Image type" is "bias" and then sum the "Darktime" column for these filtered rows. Here are the steps:\n\n1. **Filter Rows**: Select rows where the "Image type" is "bias".\n2. **Sum Darktime**: Ca