## installation and set up

In [None]:
!pip install openai

In [None]:
# setup

from google.colab import userdata
OPENAI_API_KEY=userdata.get('GROUP1_OPENAI_API_KEY')

from google.colab import drive
drive.mount('/content/drive')


from openai import OpenAI

client = OpenAI(
    api_key = OPENAI_API_KEY
)

Mounted at /content/drive


### define assistants

for this tutorial we are going to assume that you have created your assistants in the openai assistant playground (you can also test them out there). Once you've done that, add them to the list below and choose the one(s) you'd like to use.

In [None]:
# add your assistants here (or just hard code a single assistant as the one you want to use)
from google.colab import userdata

# Retrieve assistant IDs from user data
neanderthal_id = userdata.get('neanderthal_assistant_id')
proto_indo_european_id = userdata.get('proto_indo_european_assistant_id')
rising_star_id = userdata.get('rising_star_assistant_id')
sanxingdui_id = userdata.get('sanxingdui_assistant_id')

group1_assistants = {
    "neanderthal": neanderthal_id,
    "proto_indo_european": proto_indo_european_id,
    "rising_star": rising_star_id,
    "sanxingdui": sanxingdui_id
}

assistant_id = group1_assistants["sanxingdui"]


## thread cycle

### create

In [None]:
thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="can you tell me how you can help me, with reference to a quotation from a text you are able to retrieve from? (Please be sure to include at least one quoted passage from the texts you can see, and explain why that might be useful to me)"
)

print(thread)


### create the "run" which includes a message and the final prompt (PROMPT A)

### run the thread

In [None]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant_id
)

In [None]:
import json

print(json.dumps(run.dict(), indent=4))

### check on the thread

and wait for completion (could take a minute or so).

In [None]:
import time
import json

def wait_for_completion(client, thread_id, run_id):
    while True:
        run_info = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
        print(f"Current run status: {run_info.status}")
        if run_info.status == "completed":
            print("Run is complete.")
            break
        else:
            # Wait for a short period before checking again.
            print(f"not done yet, status is {run_info.status}")
            time.sleep(5)  # Adjust sleep time as needed.

    return run_info

run_info = wait_for_completion(client, thread.id, run.id)

print(json.dumps(run_info.dict(), indent=4))

### retrieve the output and print

plus new logic that sorts through ALL messages and only returns the last-produced JSON string.

In [None]:
import json
import textwrap

messages = client.beta.threads.messages.list(thread_id=thread.id)

# FOR ALL MESSAGES UNCOMMENT THIS

# for message in messages.data:
#     message_info = client.beta.threads.messages.retrieve(
#         thread_id=thread.id,
#         message_id=message.id
#     )
#     print(json.dumps(message_info.dict(), indent=4))
#     message_text = message_info.content[0].text.value
#     print(textwrap.fill(message_text, width=50))

# JUST MOST RECENT MESSAGE

message_info = client.beta.threads.messages.retrieve(
    thread_id=thread.id,
    message_id=messages.data[0].id
)

print(json.dumps(message_info.dict(), indent=4))
message_text = message_info.content[0].text.value
print(textwrap.fill(message_text, width=50))

In [None]:
def process_citations(client, message_content):
    # Copy the original message content for this run
    modified_content = message_content.value[:]
    annotations = message_content.annotations if hasattr(message_content, 'annotations') else []
    citations = []

    for index, annotation in enumerate(annotations):
        # Replace the text with a footnote marker
        modified_content = modified_content.replace(annotation.text, f' [{index}]')

        # Process file citations and paths
        if (file_citation := getattr(annotation, 'file_citation', None)):
            cited_file = client.files.retrieve(file_citation.file_id)
            citations.append(f'[{index}] {file_citation.quote} from {cited_file.filename}')
        elif (file_path := getattr(annotation, 'file_path', None)):
            cited_file = client.files.retrieve(file_path.file_id)
            citations.append(f'[{index}] Click <here> to download {cited_file.filename}')
            # Placeholder for actual file download link or method

    # Append gathered citations, ensuring they start on a new line
    if citations:
        modified_content += '\n\n' + '\n'.join(citations)  # Ensure two newlines before starting the citations

    # Wrap the modified content, including citations, with 50 character line width
    wrapped_content = textwrap.fill(modified_content, width=50)

    return wrapped_content

# Use the function to process and wrap your content, adjust function parameters as needed
wrapped_and_processed_content = process_citations(client, original_message_content)

# Print the wrapped and processed content with correct newline before citations
print(wrapped_and_processed_content)