# File Search

## Step 1: Create a new Assistant with File Search Enabled

In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os
#from tools import TOOLS
TOOLS = [
    {
        'type': 'file_search'
    },

    {
        'type': 'function',
        'function': {
            'name': 'get_events',
            'description': 'Gets events from a syllabus',
            'parameters': {
                'type': 'object',
                'properties': {
                    'course name': {
                        'type': 'string',
                        'description': 'name of the course'
                    },
                    'events': {
                        'type': 'array',
                        'items': {
                            'type': 'object',
                            'description': 'Each calendar event',
                            'properties': {
                                'name': {
                                    'type': 'string',
                                    'description': 'name of event',
                                },
                                'begin': {
                                    'type': 'string',
                                    'description': ('start date, format should be'
                                                   'YYYY-MM-DD 00:00:00')
                                },
                                'end': {
                                    'type': 'string',
                                    'description': ('end date, format should be'
                                                   'YYYY-MM-DD 00:00:00')
                                },
                                'description': {
                                    'type': 'string',
                                    'description': 'description of the event'
                                },
                                'location': {
                                    'type': 'string',
                                    'description': 'location of the event'
                                }
                            }
                        }
                    }
                },
                'required': ['course name', 'events']
            },

        }
    }

]

load_dotenv()
api_key = os.environ.get('OPENAI_API_KEY')

client = OpenAI(api_key=api_key)

assistant = client.beta.assistants.create(
        name='Calendar File Generator',
        instructions=('Your job is to generate calendar files. You do this by'
                     'only using the get_events function'),
        temperature=0.00001, # to make results as deterministic as possible
        model='gpt-4o',
        tools=TOOLS
    )

## Step 2: Upload files and add them to a Vector Store

In [2]:
vector_store = client.beta.vector_stores.create(name='syllabi')

input_folder = 'input'
file_streams = [open(os.path.join(input_folder, file_name), 'rb')
               for file_name in os.listdir(input_folder)]

file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=file_streams
)

## Step 3: Update the assistant to use the new Vector Store

In [3]:
assistant = client.beta.assistants.update(
    assistant_id = assistant.id,
    tool_resources={'file_search': {'vector_store_ids': [vector_store.id]}}
)

## Step 4: Create a thread

In [4]:
message_file=client.files.create(
    file=file_streams[0], purpose='assistants'
)
prompt = ('Read my attached pdf and get the events for it by using this'
         'get_events function. You only want to fill out Midterms, Final Exams,'
         ' Assignments, Quizes, and labs as events. I want you to open the file to get'
         'the information')

thread = client.beta.threads.create(
            messages=[
            {
            'role': 'user',
            'content': prompt,
            'attachments': [
                {'file_id': message_file.id, 'tools': [{'type': 'file_search'}]}
            ],
        }
    ]
)

## Step 5: Create a run and check the output

In [5]:
run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=assistant.id,
    #tool_choice={'type': 'function', 'function': {'name': 'get_events'}}
)

messages=list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

#message_content = messages[0].content[0].text
#annotations = message_content.annotations
#citations =[]
#for index, annotation in enumerate(annotations):
#    message_content.value = message_content.value.replace(annotation.text,f'[{index}]')
#    if file_citation != getattr(annotation, 'file_citation', None):
#        cited_file = client.files.retrieve(file_citation.file_id)
#        citations.append(f"[{index}] {cited_file.filename}")

#from ics import Calendar

#calendar = Calendar(message_content.value)
'''
for event in calendar.events:
    print(f"Event: {event.name}")
    print(f"Start: {event.begin}")
    print(f"End: {event.end}")
    print(f"Description: {event.description}")
    print(f"Location: {event.location}")
    print("-" * 20) 
'''

'\nfor event in calendar.events:\n    print(f"Event: {event.name}")\n    print(f"Start: {event.begin}")\n    print(f"End: {event.end}")\n    print(f"Description: {event.description}")\n    print(f"Location: {event.location}")\n    print("-" * 20) \n'

In [6]:
messages

[]

In [7]:
run

Run(id='run_IW2MmVPCgh5GPLS3XvhFXlRB', assistant_id='asst_ARowiEbjuUTPRubn8f5tancJ', cancelled_at=None, completed_at=None, created_at=1721104963, expires_at=1721105563, failed_at=None, incomplete_details=None, instructions='Your job is to generate calendar files. You do this byonly using the get_events function', last_error=None, max_completion_tokens=None, max_prompt_tokens=None, metadata={}, model='gpt-4o', object='thread.run', parallel_tool_calls=True, required_action=RequiredAction(submit_tool_outputs=RequiredActionSubmitToolOutputs(tool_calls=[RequiredActionFunctionToolCall(id='call_FmbbGX7LsB3O6IRl1mzMUiTs', function=Function(arguments='{"course name":"PHIL 120: Symbolic Logic I","events":[{"name":"Midterm 1","begin":"2023-09-27 13:00:00","end":"2023-09-27 13:50:00","description":"Midterm 1 covering initial topics of the course.","location":"Classroom"},{"name":"Midterm 2","begin":"2023-10-25 13:00:00","end":"2023-10-25 13:50:00","description":"Midterm 2 covering subsequent topic

In [8]:
run.required_action

RequiredAction(submit_tool_outputs=RequiredActionSubmitToolOutputs(tool_calls=[RequiredActionFunctionToolCall(id='call_FmbbGX7LsB3O6IRl1mzMUiTs', function=Function(arguments='{"course name":"PHIL 120: Symbolic Logic I","events":[{"name":"Midterm 1","begin":"2023-09-27 13:00:00","end":"2023-09-27 13:50:00","description":"Midterm 1 covering initial topics of the course.","location":"Classroom"},{"name":"Midterm 2","begin":"2023-10-25 13:00:00","end":"2023-10-25 13:50:00","description":"Midterm 2 covering subsequent topics of the course.","location":"Classroom"},{"name":"Midterm 3","begin":"2023-11-22 13:00:00","end":"2023-11-22 13:50:00","description":"Midterm 3 covering later topics of the course.","location":"Classroom"},{"name":"Final Exam","begin":"2023-12-14 14:00:00","end":"2023-12-14 16:00:00","description":"Final exam covering all course material.","location":"Classroom"}]}', name='get_events'), type='function')]), type='submit_tool_outputs')

In [13]:
json_string = run.required_action.submit_tool_outputs.tool_calls[0].function.arguments

In [14]:
import json

In [16]:
data_dict = json.loads(json_string)

In [17]:
data_dict

{'course name': 'PHIL 120: Symbolic Logic I',
 'events': [{'name': 'Midterm 1',
   'begin': '2023-09-27 13:00:00',
   'end': '2023-09-27 13:50:00',
   'description': 'Midterm 1 covering initial topics of the course.',
   'location': 'Classroom'},
  {'name': 'Midterm 2',
   'begin': '2023-10-25 13:00:00',
   'end': '2023-10-25 13:50:00',
   'description': 'Midterm 2 covering subsequent topics of the course.',
   'location': 'Classroom'},
  {'name': 'Midterm 3',
   'begin': '2023-11-22 13:00:00',
   'end': '2023-11-22 13:50:00',
   'description': 'Midterm 3 covering later topics of the course.',
   'location': 'Classroom'},
  {'name': 'Final Exam',
   'begin': '2023-12-14 14:00:00',
   'end': '2023-12-14 16:00:00',
   'description': 'Final exam covering all course material.',
   'location': 'Classroom'}]}