In [23]:
# !pip install --upgrade openai --quiet

In [1]:
from openai import OpenAI
import os
import json
from pandas import DataFrame

In [2]:
client = OpenAI(api_key=os.getenv("TRAVEL_PLANNER_API_KEY"))

### Travel Planner Defined Tools

In [3]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [4]:
from tools.accommodations.apis import Accommodations
from tools.attractions.apis import Attractions
from tools.cities.apis import Cities
from tools.flights.apis import Flights
from tools.restaurants.apis import Restaurants
from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
from tools.cities.apis import Cities

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
flight_api = Flights()

Flights API loaded.


In [6]:
accommodation_api = Accommodations()
attraction_api = Attractions()
restaurant_api = Restaurants()
google_dist_api = GoogleDistanceMatrix()
city_api = Cities()

Accommodations loaded.
Attractions loaded.
Restaurants loaded.
GoogleDistanceMatrix loaded.
Cities loaded.


In [7]:
FUNC_NAME2API = {
    "FlightSearch": flight_api,
    "AccommodationSearch": accommodation_api,
    "AttractionSearch":  attraction_api,
    "RestaurantSearch": restaurant_api,
    "CitySearch": city_api,
    "DistanceMatrix": google_dist_api
}

### Travel Planner Query Data

In [8]:
from datasets import load_dataset
from typing import Dict, List, Tuple, Union

In [9]:
DATA_SPLIT = ("train", "validation", "test")

def get_query_data_list(data_split:str) -> List[Dict]:
    assert data_split in DATA_SPLIT
    return load_dataset('osunlp/TravelPlanner',data_split)[data_split]

In [10]:
train_data = get_query_data_list("train")

In [11]:
train_data['query'][0:10]

['Please help me plan a trip from St. Petersburg to Rockford spanning 3 days from March 16th to March 18th, 2022. The travel should be planned for a single person with a budget of $1,700.',
 'Can you provide a travel plan for 1 person departing from Kansas City to Pensacola for 3 days, from March 27th to March 29th, 2022, with a budget of $900?',
 'Please design a travel plan departing from Las Vegas and heading to Stockton for 3 days, from March 3rd to March 5th, 2022, for one person, with a budget of $1,400.',
 'Craft a travel plan for me to depart from New Orleans and head to Louisville for 3 days, from March 12th to March 14th, 2022. I will be travelling alone with a budget of $1,900.',
 'Please provide me with a travel plan that departs from Las Vegas to Denver for 3 days, from March 12th to March 14th, 2022. The budget for this solo trip is $1,600.',
 'Could you assist in crafting a travel itinerary for a 5-day, single-person trip departing from Orlando and touring 2 cities in Te

### Create Assistant

In [12]:
# assistant = client.beta.assistants.create(
#   name="mobi-fork-travelplanner",
#   description="You are a helpful travel planner",
#   model="gpt-4o",
#   response_format= { "type": "json_object" },
#   tools=[
#     {
#       "type": "function",
#       "function": {
#         "name": "FlightSearch",
#         "description": "A flight information retrieval tool. For exmaple FlightSearch('New York', 'London', '2022-10-01') would fetch flights from New York to London on October 1 , 2022.",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "origin": {
#               "type": "string",
#               "description": "The city you 'll be flying out from."
#             },
#             "destination": {
#               "type": "string",
#               "description": "The city you aim to reach ."
#             },
#             "departure_date": {
#               "type": "string",
#               "description": "The date of your travel in YYYY-MM-DD format."
#             }
#           },
#           "required": ["origin", "destination", "departure_date"]
#         }
#       }
#     },
#     {
#       "type": "function",
#       "function": {
#         "name": "DistanceMatrix",
#         "description": "Estimate the distance, time and cost between two cities. For example, Example : DistanceMatrix('Paris', 'Lyon', 'self-driving') would provide driving distance, time and cost between Paris and Lyon .",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "origin": {
#               "type": "string",
#               "description": "The departure city of your journey"
#             },
#             "destination": {
#               "type": "string",
#               "description": " The destination city of your journey"
#             },
#             "mode": {
#               "type": "string",
#               "enum": ["Self-driving", "Taxi"],
#               "description": "The method of transportation."
#             }
#           },
#           "required": ["origin", "destination", "mode"]
#         }
#       }
#     },
#     {
#       "type": "function",
#       "function": {
#         "name": "AccommodationSearch",
#         "description": "Discover accommodations in your desired city. For example, AccommodationSearch('Rome') would present a list of hotel rooms in Rome.",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "city": {
#               "type": "string",
#               "description": "The name of the city where you're seeking accommodation."
#             }
#           },
#           "required": ["city"]
#         }
#       }
#     },
#     {
#       "type": "function",
#       "function": {
#         "name": "RestaurantSearch",
#         "description": "Explore dining options in a city of your choice. For example, RestaurantSearch('Tokyo') would show a curated list of restaurants in Tokyo.",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "city": {
#               "type": "string",
#               "description": "The name of the city where you're seeking restaurant."
#             }
#           },
#           "required": ["city"]
#         }
#       }
#     },
#     {
#       "type": "function",
#       "function": {
#         "name": "AttractionSearch",
#         "description": "Find attractions in a city of your choice. For example, AttractionSearch('London') would return attractions in London.",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "city": {
#               "type": "string",
#               "description": "The name of the city where you're seeking attractions."
#             }
#           },
#           "required": ["city"]
#         }
#       }
#     },
#     {
#       "type": "function",
#       "function": {
#         "name": "CitySearch",
#         "description": "Find cities in a state of your choice. For example, CitySearch('California') would return cities in California.",
#         "parameters": {
#           "type": "object",
#           "properties": {
#             "state": {
#               "type": "string",
#               "description": "The name of the state where you're seeking cities."
#             }
#           },
#           "required": ["state"]
#         }
#       }
#     },
#     # {
#     #   "type": "function",
#     #   "function": {
#     #     "name": "NotebookWrite",
#     #     "description": "Writes a new data entry into the Notebook tool with a short description. This tool should be used immediately after FlightSearch, AccommodationSearch, AttractionSearch, RestaurantSearch, or DistanceMatrix. Only the data stored in Notebook can be seen by Planner. So you should write all the information you need into Notebook. For example, NotebookWrite('Flights from Rome to Paris in 2022-02-01') would store the information of flights from Rome to Paris in 2022-02-01 in the Notebook.",
#     #     "parameters": {
#     #       "type": "object",
#     #       "properties": {
#     #         "short_description": {
#     #           "type": "string",
#     #           "description": "A brief description or label for the stored data. You don't need to write all the information in the description. The data you've searched for will be automatically stored in the Notebook."
#     #         }
#     #       },
#     #       "required": ["short_description"]
#     #     }
#     #   }
#     # },
#     # {
#     #   "type": "function",
#     #   "function": {
#     #     "name": "Planner",
#     #     "description": "A smart planning tool that crafts detailed plans based on user input and the information stored in Notebook. For example, Planner('Give me a 3-day trip plan from Seattle to New York') would return a detailed 3-day trip plan.",
#     #     "parameters": {
#     #       "type": "object",
#     #       "properties": {
#     #         "query": {
#     #           "type": "string",
#     #           "description": "The query from user."
#     #         }
#     #       },
#     #       "required": ["query"]
#     #     }
#     #   }
#     # }
#   ],

# )

In [13]:
# with open("./assistants.jsonl", 'a') as f:
#     f.writelines([assistant.model_dump_json()])

In [14]:
# assistant.id, assistant.description

('asst_UjHPGpQZk2sQqQT7QuvbPOkq', 'You are a helpful travel planner')

### Create a stream thread and run

In [24]:
def create_prompt(query):
    return  f"""{query}\n Please solve the problem step by step. 
Make only one function call at a time. 
Please collect all the information and make descisions at first, and then make a summarize of the plan at the end.
Please follow the example format when you make summarize:
-----EXAMPLE-----
Day 1:
Current City: from Ithaca to Charlotte
Transportation: Flight Number : F3633413 , from Ithaca to Charlotte , Departure Time : 05:38 , Arrival Time: 07:46
Breakfast: Nagaland 's Kitchen , Charlotte
Attraction: The Charlotte Museum of History , Charlotte
Lunch: Cafe Maple Street , Charlotte
Dinner: Bombay Vada Pav , Charlotte
Accommodation: Affordable Spacious Refurbished Room in Bushwick Charlotte

Day 2:
Current City : Charlotte
Transportation : -
Breakfast : Olive Tree Cafe , Charlotte
Attraction : The Mint Museum , Charlotte ; Romare Bearden Park , Charlotte .
Lunch : Birbal Ji Dhaba , Charlotte
Dinner : Pind Balluchi , Charlotte
Accommodation : Affordable Spacious Refurbished Room in Bushwick Charlotte 
-----EXAMPLE END-----
"""

In [25]:
# prompt = """
# Please use function calls to gather information and help me crafting a travel itinerary. 
# Please stick to the information that you get from function calls.
# Use the following format as return (do not include any markdown format):

# Day 1:
# Current City: from Ithaca to Charlotte
# Transportation: Flight Number : F3633413 , from Ithaca to Charlotte , Departure Time : 05:38 ,
# Arrival Time: 07:46
# Breakfast: Nagaland 's Kitchen , Charlotte
# Attraction: The Charlotte Museum of History , Charlotte
# Lunch: Cafe Maple Street , Charlotte
# Dinner: Bombay Vada Pav , Charlotte
# Accommodation: Affordable Spacious Refurbished Room in Bushwick Charlotte
# Day 2:
# Current City : Charlotte
# Transportation : -
# Breakfast : Olive Tree Cafe , Charlotte
# Attraction : The Mint Museum , Charlotte ; Romare Bearden Park , Charlotte .
# Lunch : Birbal Ji Dhaba , Charlotte
# Dinner : Pind Balluchi , Charlotte
# Accommodation : Affordable Spacious Refurbished Room in Bushwick Charlotte 

# Okay, now my query is:
# """

In [26]:
ASSISTANT_ID = "asst_lxFE0Q9nFmuN7tF54vHFXe65"
# ASSISTANT_ID = "asst_aMNP0aphAxBoJ1hIDkRfQuIP"
assistant = client.beta.assistants.retrieve(ASSISTANT_ID)

In [27]:
from typing_extensions import override
from openai import AssistantEventHandler

In [28]:
class EventHandler(AssistantEventHandler):
    def __init__(self, log_file):
        self.log_file = log_file
        super().__init__()
        
    @override
    def on_event(self, event):
      if event.event == 'thread.run.requires_action':
        run_id = event.data.id  # Retrieve the run ID from the event data
        self.handle_requires_action(event.data, run_id)
 
    def handle_requires_action(self, data, run_id):
      tool_outputs = []
        
      for tool in data.required_action.submit_tool_outputs.tool_calls:
        args = eval(tool.function.arguments)
        
        api = FUNC_NAME2API[tool.function.name]
        api_res = api.run_for_mobi(**args)
        
        result, result_str = {"tool_call_id": tool.id}, ''
        # parse api return
        if api_res is None:
          result["output"] = "no valid information"
          result_str = "no valid information"
        elif type(api_res) is DataFrame:
          result["output"] = api_res.to_json()
          result_str = api_res.to_markdown()
        elif type(api_res) is dict:
          result["output"] = json.dumps(api_res, indent=4)
          result_str = api_res
        else:
          assert False, f"call {tool.function.name} with argument {args}. return of api should be either DataFrame of None, but get {type(result)}, {result}\n"
        self.log_file.write(f"\n\n```\n[function call]: {tool.function.name}(**{args})\n```\napi_response:\n{result_str}\n\n")
        tool_outputs.append(result)
      
      # Submit all tool_outputs at the same time
      self.submit_tool_outputs(tool_outputs, run_id)
 
    def submit_tool_outputs(self, tool_outputs, run_id):
      # Use the submit_tool_outputs_stream helper
      with client.beta.threads.runs.submit_tool_outputs_stream(
        thread_id=self.current_run.thread_id,
        run_id=self.current_run.id,
        tool_outputs=tool_outputs,
        event_handler=EventHandler(self.log_file),
      ) as stream:
        for text in stream.text_deltas:
          print(text, end="", flush=True)
          self.log_file.write(text)
        self.log_file.write('\n')

In [29]:
def run_thread(log_file:str, query:str):
    thread = client.beta.threads.create()
    message = client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=create_prompt(query),
    )
    
    with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant.id,
        event_handler=EventHandler(log_file)
    ) as stream:
        stream.until_done()

In [30]:
import time 

In [31]:
s, e = 0, len(train_data)

run_timestamp = str(int(time.time()))
print(f"run time stampe: {run_timestamp}\n start idx: {s}\t end idx: {e}\n")
run_log_dir = os.path.join("run_log/train/", run_timestamp)
os.makedirs(run_log_dir, exist_ok=False)

for i in range(s, e):
    query_id = f"train_{i}"
    try:
        query = train_data['query'][i]
        with open(os.path.join(run_log_dir, f"{query_id}.md"), "a") as log_file:
            log_file.write(f"### query_id:\n{query_id}\n### query:\n{query}\n\n")
            run_thread(log_file, query)
    except Exception as e:
        print(f"{query_id}: {e}")

run time stampe: 1716839838
 start idx: 0	 end idx: 45

The flight information for travel from St. Petersburg to Rockford on March 16th, 2022 is as follows:

- **Flight Number:** F3573659
- **Price:** $474
- **Departure Time:** 15:40
- **Arrival Time:** 17:04
- **Duration:** 2 hours 24 minutes
- **Distance:** 1049 miles

Next, let's look for accommodations in Rockford. I will search for accommodations for two nights, from March 16th to March 18th.Here are the accommodation options available in Rockford with their respective details:

1. **Spacious 3BDR Prime Location!**
   - Price: $1030 (2-night total)
   - Room Type: Entire home/apt
   - House Rules: No smoking
   - Minimum Nights: 2
   - Max Occupancy: 9
   - Review Rate: 2

2. **Private Room in a two bedroom apt.**
   - Price: $210 (2-night total)
   - Room Type: Private room
   - House Rules: No visitors & No smoking
   - Minimum Nights: 1
   - Max Occupancy: 2
   - Review Rate: 4

3. **Private rooms And Matchless Location**
   - 

### Upload Files to OpenAI (deprecated)

In [11]:
DATA_ROOT = "../database/" # download from google drive

file_paths = [os.path.join(DATA_ROOT, f) for f in ("accommodations/clean_accommodations_2022.csv",
                                        "attractions/attractions.csv",
                                        "flights/clean_Flights_2022.csv",
                                        "googleDistanceMatrix/distance.csv",
                                        "restaurants/clean_restaurant_2022.csv")]
file_paths[0]

'../database/accommodations/clean_accommodations_2022.csv'

In [14]:
local_file2openai = {}
for fp in file_paths:
  openai_file = client.files.create(
    file=open(fp, "rb"),
    purpose='assistants'
  )
  local_file2openai[fp] = openai_file.model_dump_json()

In [19]:
local_file2openai_json = {}
for k, v in local_file2openai.items():
    local_file2openai_json[k] = json.loads(v)

In [22]:
with open("./local_file2openai_json.json", "w") as f:
    json.dump(local_file2openai_json, f, indent=4)