In [23]:
# !pip install --upgrade openai --quiet

In [8]:
from openai import OpenAI
import os
import json
from pandas import DataFrame

In [9]:
client = OpenAI(api_key=os.getenv("TRAVEL_PLANNER_API_KEY"))

### Travel Planner Defined Tools

In [10]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [11]:
from tools.accommodations.apis import Accommodations
from tools.attractions.apis import Attractions
from tools.cities.apis import Cities
from tools.flights.apis import Flights
from tools.restaurants.apis import Restaurants
from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
from tools.cities.apis import Cities

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
flight_api = Flights()

Flights API loaded.


In [13]:
accommodation_api = Accommodations()
attraction_api = Attractions()
restaurant_api = Restaurants()
google_dist_api = GoogleDistanceMatrix()
city_api = Cities()

Accommodations loaded.
Attractions loaded.
Restaurants loaded.
GoogleDistanceMatrix loaded.
Cities loaded.


In [14]:
FUNC_NAME2API = {
    "FlightSearch": flight_api,
    "AccommodationSearch": accommodation_api,
    "AttractionSearch":  attraction_api,
    "RestaurantSearch": restaurant_api,
    "CitySearch": city_api,
    "DistanceMatrix": google_dist_api
}

### Travel Planner Query Data

In [15]:
from datasets import load_dataset
from typing import Dict, List, Tuple, Union

In [16]:
DATA_SPLIT = ("train", "validation", "test")

def get_query_data_list(data_split:str) -> List[Dict]:
    assert data_split in DATA_SPLIT
    return load_dataset('osunlp/TravelPlanner',data_split)[data_split]

In [17]:
train_data = get_query_data_list("train")

In [18]:
train_data['query'][0:10]

['Please help me plan a trip from St. Petersburg to Rockford spanning 3 days from March 16th to March 18th, 2022. The travel should be planned for a single person with a budget of $1,700.',
 'Can you provide a travel plan for 1 person departing from Kansas City to Pensacola for 3 days, from March 27th to March 29th, 2022, with a budget of $900?',
 'Please design a travel plan departing from Las Vegas and heading to Stockton for 3 days, from March 3rd to March 5th, 2022, for one person, with a budget of $1,400.',
 'Craft a travel plan for me to depart from New Orleans and head to Louisville for 3 days, from March 12th to March 14th, 2022. I will be travelling alone with a budget of $1,900.',
 'Please provide me with a travel plan that departs from Las Vegas to Denver for 3 days, from March 12th to March 14th, 2022. The budget for this solo trip is $1,600.',
 'Could you assist in crafting a travel itinerary for a 5-day, single-person trip departing from Orlando and touring 2 cities in Te

### Create a stream thread and run

In [22]:
def start_prompt():
    return """You are a proficient planner . Based on the provided information and query , please give me
a detailed plan, including specifics such as flight numbers (e.g., F0123456), restaurant
names, and hotel names. Note that all the information in your plan should be derived from
the provided data. You must adhere to the response format given in the example. Additionally, all
details should align with common sense. Attraction visits and meals are expected to be
diverse . The symbol '-' indicates that information is unnecessary . For example , in the
provided sample, you do not need to plan after returning to the departure city . When you
travel to two cities in one day , you should note it in the 'Current City' section as in
the example (i.e., from A to B)."""

In [23]:
def cot_prompt():
    return "Please solve the problem step by step. Make only one function call at a time."

In [24]:
def example_and_query_prompt(query):
    return f"""
***** Example *****
Query: Could you create a travel plan for 7 people from Ithaca to Charlotte spanning 3 days, from March 8th to March 14th, 2022, with a budget of $30,200?
Travel Plan:
Day 1:
Current City: from Ithaca to Charlotte
Transportation: Flight Number: F3633413, from Ithaca to Charlotte, Departure Time: 05:38, Arrival Time: 07:46
Breakfast: Nagaland's Kitchen, Charlotte
Attraction: The Charlotte Museum of History, Charlotte
Lunch: Cafe Maple Street, Charlotte
Dinner: Bombay Vada Pav, Charlotte
Accommodation: Affordable Spacious Refurbished Room in Bushwick!, Charlotte

Day 2:
Current City: Charlotte
Transportation: -
Breakfast: Olive Tree Cafe, Charlotte
Attraction: The Mint Museum, Charlotte;Romare Bearden Park, Charlotte.
Lunch: Birbal Ji Dhaba, Charlotte
Dinner: Pind Balluchi, Charlotte
Accommodation: Affordable Spacious Refurbished Room in Bushwick!, Charlotte

Day 3:
Current City: from Charlotte to Ithaca
Transportation: Flight Number: F3786167, from Charlotte to Ithaca, Departure Time: 21:42, Arrival Time: 23:26
Breakfast: Subway, Charlotte
Attraction: Books Monument, Charlotte.
Lunch: Olive Tree Cafe, Charlotte
Dinner: Kylin Skybar, Charlotte
Accommodation: -
***** Example Ends *****

Query: {query}
Travel Plan:"""

In [25]:
def create_prompt(query):
    return  f"""{start_prompt()}\n{cot_prompt()}\n{example_and_query_prompt(query)}"""

In [26]:
ASSISTANT_ID = "asst_lxFE0Q9nFmuN7tF54vHFXe65"
# ASSISTANT_ID = "asst_aMNP0aphAxBoJ1hIDkRfQuIP"
assistant = client.beta.assistants.retrieve(ASSISTANT_ID)

In [27]:
from typing_extensions import override
from openai import AssistantEventHandler

In [28]:
class EventHandler(AssistantEventHandler):
    def __init__(self, log_file):
        self.log_file = log_file
        super().__init__()
        
    @override
    def on_event(self, event):
      if event.event == 'thread.run.requires_action':
        run_id = event.data.id  # Retrieve the run ID from the event data
        self.handle_requires_action(event.data, run_id)
 
    def handle_requires_action(self, data, run_id):
      tool_outputs = []
        
      for tool in data.required_action.submit_tool_outputs.tool_calls:
        args = eval(tool.function.arguments)
        
        api = FUNC_NAME2API[tool.function.name]
        api_res = api.run_for_mobi(**args)
        
        result, result_str = {"tool_call_id": tool.id}, ''
        # parse api return
        if api_res is None:
          result["output"] = "no valid information"
          result_str = "no valid information"
        elif type(api_res) is DataFrame:
          result["output"] = api_res.to_json()
          result_str = api_res.to_markdown()
        elif type(api_res) is dict:
          result["output"] = json.dumps(api_res, indent=4)
          result_str = api_res
        else:
          assert False, f"call {tool.function.name} with argument {args}. return of api should be either DataFrame of None, but get {type(result)}, {result}\n"
        self.log_file.write(f"\n\n```\n[function call]: {tool.function.name}(**{args})\n```\napi_response:\n{result_str}\n\n")
        tool_outputs.append(result)
      
      # Submit all tool_outputs at the same time
      self.submit_tool_outputs(tool_outputs, run_id)
 
    def submit_tool_outputs(self, tool_outputs, run_id):
      # Use the submit_tool_outputs_stream helper
      with client.beta.threads.runs.submit_tool_outputs_stream(
        thread_id=self.current_run.thread_id,
        run_id=self.current_run.id,
        tool_outputs=tool_outputs,
        event_handler=EventHandler(self.log_file),
      ) as stream:
        for text in stream.text_deltas:
          print(text, end="", flush=True)
          self.log_file.write(text)
        self.log_file.write('\n')

In [29]:
def run_thread(log_file:str, query:str):
    thread = client.beta.threads.create()
    message = client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=create_prompt(query),
    )
    
    with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant.id,
        event_handler=EventHandler(log_file)
    ) as stream:
        stream.until_done()

In [30]:
import time 

In [31]:
s, e = 0, 5 #len(train_data)

run_timestamp = str(int(time.time()))
print(f"run time stampe: {run_timestamp}\n start idx: {s}\t end idx: {e}\n")
run_log_dir = os.path.join("run_log/train/", run_timestamp)
os.makedirs(run_log_dir, exist_ok=False)

for i in range(s, e):
    query_id = f"train_{i}"
    try:
        query = train_data['query'][i]
        with open(os.path.join(run_log_dir, f"{query_id}.md"), "a") as log_file:
            log_file.write(f"### query_id:\n{query_id}\n### query:\n{query}\n\n")
            run_thread(log_file, query)
    except Exception as e:
        print(f"{query_id}: {e}")

run time stampe: 1717008413
 start idx: 0	 end idx: 5

Here is your detailed travel plan:

### Day 1:
**Current City:** from St. Petersburg to Rockford  
**Transportation:** Flight Number: F3573659, from St. Petersburg to Rockford, Departure Time: 15:40, Arrival Time: 17:04

**Breakfast:** -  
**Attraction:** -  
**Lunch:** -  
**Dinner:** Aroma Rest O Bar, Rockford  
**Accommodation:** Private Room in a two bedroom apt., Rockford

### Day 2:
**Current City:** Rockford  
**Transportation:** -

**Breakfast:** Dunkin' Donuts, Rockford  
**Attraction:** Burpee Museum of Natural History, Rockford  
**Lunch:** Flying Mango, Rockford  
**Attraction:** Anderson Japanese Gardens, Rockford  
**Dinner:** Mirchievous, Rockford  
**Accommodation:** Private Room in a two bedroom apt., Rockford

### Day 3:
**Current City:** Rockford  
**Transportation:** -

**Breakfast:** Subway, Rockford  
**Attraction:** Tinker Swiss Cottage Museum and Gardens, Rockford  
**Lunch:** Faaso's, Rockford  
**Attractio

### Upload Files to OpenAI (deprecated)

In [11]:
DATA_ROOT = "../database/" # download from google drive

file_paths = [os.path.join(DATA_ROOT, f) for f in ("accommodations/clean_accommodations_2022.csv",
                                        "attractions/attractions.csv",
                                        "flights/clean_Flights_2022.csv",
                                        "googleDistanceMatrix/distance.csv",
                                        "restaurants/clean_restaurant_2022.csv")]
file_paths[0]

'../database/accommodations/clean_accommodations_2022.csv'

In [14]:
local_file2openai = {}
for fp in file_paths:
  openai_file = client.files.create(
    file=open(fp, "rb"),
    purpose='assistants'
  )
  local_file2openai[fp] = openai_file.model_dump_json()

In [19]:
local_file2openai_json = {}
for k, v in local_file2openai.items():
    local_file2openai_json[k] = json.loads(v)

In [22]:
with open("./local_file2openai_json.json", "w") as f:
    json.dump(local_file2openai_json, f, indent=4)