In [1]:
from pathlib import Path

from openai import OpenAI
from dotenv import dotenv_values

# Load the API key from the .env file
env_path = Path('../.env')
config = dotenv_values(env_path)
client = OpenAI(api_key=config['OPENAI_API_KEY'])

Create description and instructions for the assistant.  The description should be a brief summary of what the bot does and must be less than 512 characters. Its only used to display in the openai portal - not too important.

The 'instructructions' however are critical.  They are where 'prompt engineering' come in to find the best instuctions to give to the bot to make it work how you want.  Just as it's easier to format and edit the intructions text, they are stored in a separate file 'Data/tutor_prompt.md' and loaded from there.

In [None]:
description = "A Chemistry Tutor bot specializing in Quantum Chemistry, utilizing the textbook QUANTUM CHEMISTRY SECOND EDITION by Donald A. McQuarrie. Bot will assist students after they have completed exams questions by providing feedback and guidance on where their answers could be improved."

print(f"Description length: {len(description)}.")
assert len(description) <= 512, "Description must be less than 512 characters."

In [None]:
# Path to the prompt file
prompt_path = Path('Data/tutor_prompt.md')

# Read the prompt from the file
with open(prompt_path, 'r') as file:
    instructions = file.read()
    print(f"Instructions length: {len(instructions)}.")

We now  need to create an openai "Assistant".  Our api call returns a Assistants object that contains a unique id for that assistant.  We use this id when dispatching messages to the assistent.  As with files uploaded to openai - we only need to do this once. We need to associate the vector store with the assistant.

In [None]:
# Create the tutor agent using the beta OpenAI Assistants API
tutor_agent = client.beta.assistants.create(
    model="gpt-4-turbo",
    name="QM Professor Tutor",
    description=description,
    instructions=instructions,
    tools=[{"type": "file_search"}],
    temperature=0.2,
)

Now we need to add knowledge to our assistant in the form of files that the assistant can search using their "file_search" tool.  Upload the knowledge base to openai using their api.  The upload returns a open ai [file object](https://platform.openai.com/docs/api-reference/files) that has a unique "id" attribute.  Once we have uploaded once we can afterwards just use this unique id attribute when creating an a vector store from these files.

In [None]:
# Path to the knowledge file
knowledge_file_path = Path('KnowledgeBase/QUANTUM_CHEMISTRY_SECOND_EDITION.pdf')

# Upload the knowledge file to OpenAI
with open(knowledge_file_path, 'rb') as content_file:
    openai_file_obj = client.files.create(
        purpose='assistants',
        file=content_file,
    )

Now we need to create a [Vector Store](https://platform.openai.com/docs/assistants/tools/file-search/vector-stores) to hold an embedding of the files we want in our knowledge base for the bot.

In [None]:
# Create the vector store
vector_store = client.beta.vector_stores.create(name='quantum-chemistry')

Parsing and embedding the file into the store is an async process that can take some time to best to poll for success before continuing.

In [None]:
embedded_file = client.beta.vector_stores.files.create_and_poll(
    vector_store_id=vector_store.id,
    file_id=openai_file_obj.id,
)

Now we have to update our assistant to give it the id of the newly embedded data.

In [None]:
tutor_agent = client.beta.assistants.update(
    assistant_id=tutor_agent.id,
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

Now save the secret openai ids into the .env file so they won't leak accidentally

In [None]:
# Append the tutor agent id and the vector store id  to the .env file
config['TUTOR_AGENT_ID'] = tutor_agent.id
config['VECTOR_STORE_ID'] = vector_store.id
config['KNOWLEDGE_FILE_ID'] = openai_file_obj.id

with open(env_path, 'w') as file:
    for key, value in config.items():
        file.write(f"{key}={value}\n")