In [20]:
_DEFAULT_RAG_INSTRUCTIONS = """
You are an intelligent assistant designed to help users track and reflect on their daily lives through their conversation history. You have access to prior conversation summaries stored in JSON files (e.g., "conversation_250111.json"). These files contain key points from the user's day-to-day conversations, including activities, preferences, feelings, and notable events.

Your primary goal is to:
1. Retrieve relevant information from past conversation files to provide meaningful context during the current conversation.
2. Proactively bring up recurring patterns, habits, or topics the user often mentions, and ask thoughtful questions or make observations based on this context (e.g., "You’ve been eating spicy food a lot lately, is your stomach feeling okay?").
3. Offer a personalized and engaging conversational experience by remembering and referencing specific details about the user.

Keep your tone friendly, empathetic, and natural. If no relevant context exists in the conversation history, ask open-ended questions to learn more about the user's recent experiences.
"""

In [5]:
import json

def show_json(obj):
    display(json.loads(obj.model_dump_json()))

In [2]:
from openai import OpenAI
import os
from dotenv import load_dotenv

load_dotenv()
OPENAI_KEY = os.getenv("OPENAI_KEY")

# OpenAI 클라이언트 초기화
client = OpenAI(api_key=OPENAI_KEY)

assistant = client.beta.assistants.create(
name="diary writing assistant",
instructions="""
너는 사용자와 친근하게 대화하는 어시스턴트 "melissa"야. 
        
대화 방식:
        1. 현재 사용자의 말에 먼저 자연스럽게 반응해
        2. 만약 과거 대화 내용과 연관성이 있다면, 그 내용을 자연스럽게 언급해
        3. 현재 대화 주제나 사용자의 관심사를 고려해서 새로운 질문을 해
        
주의사항:
        - 반말로 대화해
        - 과거 대화는 있을 때만 언급하고, 없으면 현재 대화에만 집중해
        - 날짜 있으면 "며칠 전에", "저번 주에" 같이 자연스럽게 표현해
        - 답변은 간결하게 하되, 기계적이지 않게 해
        - 항상 흥미로운 새 질문으로 마무리해
""",
model="gpt-4o-mini",
tools=[{"type": "file_search"}],
)

In [7]:
import time
def wait_on_run(run, thread):
    while run.status == "queued" or run.status == "in_progress":
        run = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id
        )
        time.sleep(0.5)
    return run

In [3]:
vector_store = client.beta.vector_stores.create(name="DIARY_ASSISTANT")

file_paths = ["summary/summary_250111.json", "summary/summary_250112.json", "summary/summary_250113.json"]
file_streams = [open(path, "rb") for path in file_paths]

file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
vector_store_id=vector_store.id, files=file_streams
)

print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=3, failed=0, in_progress=0, total=3)


In [4]:
assistant = client.beta.assistants.update(
assistant_id=assistant.id,
tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

In [12]:
message_file = client.files.create(
file=open("summary/summary_250111.json", "rb"), purpose="assistants"
)

In [17]:
thread = client.beta.threads.create(
messages=[
  {
    "role": "user",
    "content": "나 오늘 딸기 빵 먹었어어",
    "attachments": [
      { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
    ],
  }
]
)

# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)

ToolResourcesFileSearch(vector_store_ids=['vs_CUyapbSBOtIA8Gygxi78GmNl'])


In [21]:
from typing_extensions import override
from openai import AssistantEventHandler, OpenAI

client = OpenAI(api_key=OPENAI_KEY)

class EventHandler(AssistantEventHandler):
  @override
  def on_text_created(self, text) -> None:
      print(f"\nassistant > ", end="", flush=True)

  @override
  def on_tool_call_created(self, tool_call):
      print(f"\nassistant > {tool_call.type}\n", flush=True)

  @override
  def on_message_done(self, message) -> None:
      # print a citation to the file searched
      message_content = message.content[0].text
      annotations = message_content.annotations
      citations = []
      for index, annotation in enumerate(annotations):
          message_content.value = message_content.value.replace(
              annotation.text, f"[{index}]"
          )
          if file_citation := getattr(annotation, "file_citation", None):
              cited_file = client.files.retrieve(file_citation.file_id)
              citations.append(f"[{index}] {cited_file.filename}")

      print(message_content.value)
      print("\n".join(citations))


# Then, we use the stream SDK helper
# with the EventHandler class to create the Run
# and stream the response.

with client.beta.threads.runs.stream(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions=_DEFAULT_RAG_INSTRUCTIONS,
  event_handler=EventHandler(),
) as stream:
  stream.until_done()


assistant > file_search


assistant > 최근에 딸기가 들어간 크림 모카번을 좋아한다고 하셨는데, 오늘 먹은 딸기 빵도 그런 종류였나요? 딸기가 많이 들어갔나요, 아니면 크림이랑 함께 들어간 빵이었나요? 🍓[0]
[0] summary_250112.json
