In [1]:
# !pip install python-dotenv

In [2]:
# !pip install openai

In [3]:
import pandas as pd
import openai

In [5]:
client = openai.OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key= x,
)

In [6]:
import json
import re
import requests

In [7]:
df = pd.read_csv('prompt_data.csv')
df

Unnamed: 0,Location,Start Date,End Date,Budget,Interests
0,Orlando,20-jul,22-jul,30000,"Cafes, Art, Hiking"
1,Washington D.C.,31-ago,2-sept,30000,"Live Sports, Desserts, Beaches"
2,Boston,27-oct,29-oct,30000,"Sightseeing, Bars, Shopping"
3,New Orleans,20-ago,22-ago,30000,"Museums, Restaurants, Parks"
4,Seattle,3-jul,5-jul,30000,"Art, Hiking, Cafes"
5,Honolulu,9-sept,11-sept,30000,"Bars, Beaches, Live Sports"
6,San Diego,21-nov,23-nov,30000,"Bars, Shopping, Sightseeing"
7,Nashville,26-jul,28-jul,30000,"Restaurants, Parks, Museums"
8,Austin,20-jun,22-jun,30000,"Hiking, Cafes, Art"
9,Philadelphia,30-oct,1-nov,30000,"Live Sports, Desserts, Beaches"


In [8]:
# Function to calculate total travel time
def calculate_total_travel_time(travel_plan, api_key):
    total_travel_time = 0  # in seconds

    # Base URL for Google Maps Distance Matrix API
    base_url = "https://maps.googleapis.com/maps/api/distancematrix/json"

    # Iterate through the travel plan to get the addresses
    for i in range(len(travel_plan) - 1):
        origin = travel_plan[i]["address"]
        destination = travel_plan[i + 1]["address"]

        # Parameters for the API request
        params = {
            "origins": origin,
            "destinations": destination,
            "key": api_key
        }

        # Make the API request
        response = requests.get(base_url, params=params)
        data = response.json()

        if response.status_code == 200:
            # Extract the travel time from the response
            travel_time = data["rows"][0]["elements"][0]["duration"]["value"]
            total_travel_time += travel_time
        else:
            print(f"Error fetching data from Google Maps API: {data['error_message']}")
            return None

    # Convert total travel time from seconds to hours and minutes
    total_travel_time_hours = total_travel_time // 3600
    total_travel_time_minutes = total_travel_time // 60

    return total_travel_time_minutes

def clean_json(content):
    # Remove leading/trailing whitespace and backticks
    content = content.strip('`').strip()
    # Remove any numbering or extraneous characters
    content = re.sub(r'^\d+\.\s*', '', content, flags=re.MULTILINE)
    # Remove trailing commas before closing braces or brackets
    content = re.sub(r',\s*(\]|\})', r'\1', content)
    # Remove non-JSON content
    content = re.sub(r'```json|```', '', content)
    return content

def calculate_budget_utilization(budget, travel_plan):
    # Calculate the total cost of activities in the travel plan
    total_cost = sum(activity["price"] for activity in travel_plan)
    
    # If the total cost exceeds the budget, return 0
    if total_cost > budget:
        return 0
    
    # Calculate the percentage of budget utilization
    budget_utilization_percentage = (total_cost / budget) * 100
    
    return budget_utilization_percentage

def calculate_satisfaction_percentage(interests, travel_plan):
    # Convert interests to a set of lowercase strings
    interests_set = set(interest.lower() for interest in interests)
    
    # Get a list of all activity categories in the travel plan
    activity_categories = {activity["category"].lower() for activity in travel_plan}
    
    # Calculate the number of satisfied interests
    satisfied_interests_count = sum(1 for interest in interests_set if interest in activity_categories)
    
    # Calculate the percentage of interests satisfied
    satisfaction_percentage = (satisfied_interests_count / len(interests_set)) * 100
    
    return satisfaction_percentage

def calculate_activity_score(travel_plan):
    repetition_penalty = 5      # Penalty for each repeated category

    # Count the occurrences of each activity category
    category_count = {}
    for activity in travel_plan:
        category = activity["category"].lower()
        if category in category_count:
            category_count[category] += 1
        else:
            category_count[category] = 1

    # Calculate the score
    unique_categories = len(category_count)
    repetitions = sum(count - 1 for count in category_count.values() if count > len(travel_plan)/len(category_count))

    # Initial score based on unique categories
    score = 100 - (repetitions * repetition_penalty)

    # Ensure the score is within the 0-100 range
    score = max(0, min(100, score))

    return score

In [36]:
import pandas as pd

# Step 1: Read the Excel file
df = pd.read_csv('prompt_data.csv')

responses = []
columns = ['interests_score', 'budget_score', 'travel_time_score']
df_eval_metrics = pd.DataFrame(columns=columns)

# Step 2: Generate sentences if the column names are correct
sentences = []

In [53]:
for index, row in df.iloc[40:].iterrows():
    location = row['Location']
    start_date = row['Start Date']
    end_date = row['End Date']
    budget = row['Budget']
    interests = row['Interests'].split(', ')  # Split the interests by comma and space

    sentence = f"Create a very busy travel plan for the following location, dates and interests. For each event or activity planned provide a category for the activity, timeframe with hours of the day from when to when the activity will take, price for the activity and address for location of the activity. Location: {location} Start date: {start_date} End Date: {end_date} Budget: {budget} Interests: {', '.join(interests[:3])}"
    system_message = "You are a travel provider and will respond with a list of activities. Please ensure output is only JSON response and each activity is described in a clean JSON format ready for parsing, with keys for address, date, time start, time end, price, and activity category. Example format: {'address': '5419 W Adams Blvd, Los Angeles, CA 90016', 'date': '2024-09-01', 'time_start': '09:00', 'time_end': '11:00', 'price': 10, 'category': 'Coffee'}."
    sentences.append(sentence)
    # Make API call with each sentence
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": sentence}
        ],
        temperature=0.5
    )
    
    # Print the response
    response_content = (response.choices[0].message.content)
    #print(f"Raw response content:\n{response_content}\n")  # Debugging print

    # Clean the JSON content
    cleaned_content = clean_json(response_content)
    print(f"Cleaned response content:\n{cleaned_content}\n")  # Debugging print

    try:
        response_json = json.loads(cleaned_content)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        response_json = {}

    # Add the response JSON to the list
    responses.append(response_json)
    
    # Calculate and print the percentage of budget utilization
    budget_utilization_percentage = calculate_budget_utilization(int(budget[:-3]), response_json)
    print(budget_utilization_percentage)
    
    satisfaction_percentage = calculate_satisfaction_percentage(interests, response_json)
    print(satisfaction_percentage)
    
    total_travel_time_minutes = calculate_total_travel_time(response_json, api_key)
    print(f"Total Travel Time: {total_travel_time_minutes} minutes")
    
    
    activity_score = calculate_activity_score(response_json)
    print(f"Activity Score: {activity_score}")

    new_row = {
        'location' : location,
        'interests_score': satisfaction_percentage,
        'budget_score': budget_utilization_percentage,
        'travel_time_score': total_travel_time_minutes,
        'redundency_score': activity_score
    }
    df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)
    


Cleaned response content:
[
  {
    "address": "Sheikh Mohammed bin Rashid Blvd, Dubai, UAE",
    "date": "2023-07-28",
    "time_start": "09:00",
    "time_end": "11:00",
    "price": 15,
    "category": "Café"
  },
  {
    "address": "Dubai Hills, Dubai, UAE",
    "date": "2023-07-28",
    "time_start": "12:00",
    "time_end": "15:00",
    "price": 0,
    "category": "Hiking"
  },
  {
    "address": "Jumeirah Beach Rd, Dubai, UAE",
    "date": "2023-07-28",
    "time_start": "16:00",
    "time_end": "18:00",
    "price": 20,
    "category": "Desserts"
  },
  {
    "address": "Al Wasl Rd, Dubai, UAE",
    "date": "2023-07-29",
    "time_start": "09:00",
    "time_end": "11:00",
    "price": 10,
    "category": "Café"
  },
  {
    "address": "Hatta, Dubai, UAE",
    "date": "2023-07-29",
    "time_start": "12:00",
    "time_end": "15:00",
    "price": 0,
    "category": "Hiking"
  },
  {
    "address": "Downtown Dubai, Dubai, UAE",
    "date": "2023-07-29",
    "time_start": "16:00",


  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
    {
        "address": "1 Fullerton Rd, Singapore 049213",
        "date": "2023-11-22",
        "time_start": "18:00",
        "time_end": "22:00",
        "price": 50,
        "category": "Bars"
    },
    {
        "address": "8 Sentosa Gateway, Singapore 098269",
        "date": "2023-11-23",
        "time_start": "10:00",
        "time_end": "14:00",
        "price": 80,
        "category": "Sports"
    },
    {
        "address": "2 Orchard Turn, ION Orchard, Singapore 238801",
        "date": "2023-11-23",
        "time_start": "15:00",
        "time_end": "19:00",
        "price": 100,
        "category": "Shopping"
    },
    {
        "address": "30 Victoria St, CHIJMES, Singapore 187996",
        "date": "2023-11-24",
        "time_start": "20:00",
        "time_end": "23:00",
        "price": 60,
        "category": "Bars"
    },
    {
        "address": "1 Stadium Dr, Singapore 397629",
        "date": "2023-11-25",
        "time_start": "09:0

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
  {
    "address": "Carrer de Mallorca, 401, 08013 Barcelona, Spain",
    "date": "2024-08-03",
    "time_start": "09:00",
    "time_end": "11:00",
    "price": 26,
    "category": "Sightseeing"
  },
  {
    "address": "Passeig de Gràcia, 43, 08007 Barcelona, Spain",
    "date": "2024-08-03",
    "time_start": "12:00",
    "time_end": "14:00",
    "price": 50,
    "category": "Shopping"
  },
  {
    "address": "Rambla de Catalunya, 37, 08007 Barcelona, Spain",
    "date": "2024-08-03",
    "time_start": "15:00",
    "time_end": "17:00",
    "price": 60,
    "category": "Restaurants"
  },
  {
    "address": "Carrer de Montcada, 15-23, 08003 Barcelona, Spain",
    "date": "2024-08-04",
    "time_start": "10:00",
    "time_end": "12:00",
    "price": 12,
    "category": "Sightseeing"
  },
  {
    "address": "Passeig de Gràcia, 49, 08007 Barcelona, Spain",
    "date": "2024-08-04",
    "time_start": "13:00",
    "time_end": "15:00",
    "price": 45,
    "categor

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
  {
    "address": "Alemdar, Yerebatan Cd. 1/3, 34110 Fatih/Istanbul, Turkey",
    "date": "2024-06-30",
    "time_start": "09:00",
    "time_end": "11:00",
    "price": 15,
    "category": "Museum"
  },
  {
    "address": "Cankurtaran, Ayasofya Meydanı, 34122 Fatih/Istanbul, Turkey",
    "date": "2024-06-30",
    "time_start": "12:00",
    "time_end": "14:00",
    "price": 20,
    "category": "Museum"
  },
  {
    "address": "Şahkulu, Galip Dede Cd. No:56, 34421 Beyoğlu/Istanbul, Turkey",
    "date": "2024-06-30",
    "time_start": "15:00",
    "time_end": "17:00",
    "price": 10,
    "category": "Bar"
  },
  {
    "address": "Cihangir, Akarsu Ykş. No:46, 34433 Beyoğlu/Istanbul, Turkey",
    "date": "2024-07-01",
    "time_start": "09:00",
    "time_end": "12:00",
    "price": 0,
    "category": "Hiking"
  },
  {
    "address": "Hoca Paşa, Ebussuud Cd. No:12, 34110 Fatih/Istanbul, Turkey",
    "date": "2024-07-01",
    "time_start": "13:00",
    "time_end"

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
    {
        "address": "Museumplein 6, 1071 DJ Amsterdam, Netherlands",
        "date": "2023-09-21",
        "time_start": "10:00",
        "time_end": "13:00",
        "price": 20,
        "category": "Art"
    },
    {
        "address": "Strandzuid, Europaplein 22, 1078 GZ Amsterdam, Netherlands",
        "date": "2023-09-21",
        "time_start": "14:00",
        "time_end": "17:00",
        "price": 0,
        "category": "Beaches"
    },
    {
        "address": "Dam 9, 1012 JS Amsterdam, Netherlands",
        "date": "2023-09-21",
        "time_start": "18:00",
        "time_end": "20:00",
        "price": 50,
        "category": "Restaurants"
    },
    {
        "address": "Prinsengracht 263-267, 1016 GV Amsterdam, Netherlands",
        "date": "2023-09-22",
        "time_start": "10:00",
        "time_end": "12:00",
        "price": 12,
        "category": "Art"
    },
    {
        "address": "IJpromenade 1, 1031 KT Amsterdam, Netherlands",
  

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
  {
    "address": "Kurfürstendamm, 10719 Berlin, Germany",
    "date": "2023-12-09",
    "time_start": "10:00",
    "time_end": "13:00",
    "price": 0,
    "category": "Shopping"
  },
  {
    "address": "Friedrichstraße, 10117 Berlin, Germany",
    "date": "2023-12-09",
    "time_start": "14:00",
    "time_end": "17:00",
    "price": 0,
    "category": "Shopping"
  },
  {
    "address": "Rosenthaler Str. 40/41, 10178 Berlin, Germany",
    "date": "2023-12-09",
    "time_start": "18:00",
    "time_end": "19:30",
    "price": 15,
    "category": "Desserts"
  },
  {
    "address": "Grunewald, Berlin, Germany",
    "date": "2023-12-10",
    "time_start": "09:00",
    "time_end": "13:00",
    "price": 0,
    "category": "Hiking"
  },
  {
    "address": "Alexanderplatz, 10178 Berlin, Germany",
    "date": "2023-12-10",
    "time_start": "14:00",
    "time_end": "17:00",
    "price": 0,
    "category": "Shopping"
  },
  {
    "address": "Unter den Linden, 10117 B

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
    {
        "address": "Plaza de la Independencia, 7, 28001 Madrid, Spain",
        "date": "2023-10-21",
        "time_start": "09:00",
        "time_end": "12:00",
        "price": 0,
        "category": "Parks"
    },
    {
        "address": "Calle de la Cava Baja, 30, 28005 Madrid, Spain",
        "date": "2023-10-21",
        "time_start": "20:00",
        "time_end": "23:00",
        "price": 30,
        "category": "Bars"
    },
    {
        "address": "Paseo del Prado, s/n, 28014 Madrid, Spain",
        "date": "2023-10-22",
        "time_start": "10:00",
        "time_end": "13:00",
        "price": 0,
        "category": "Parks"
    },
    {
        "address": "Calle de Echegaray, 20, 28014 Madrid, Spain",
        "date": "2023-10-22",
        "time_start": "21:00",
        "time_end": "00:00",
        "price": 40,
        "category": "Bars"
    },
    {
        "address": "Calle de Serrano, 61, 28006 Madrid, Spain",
        "date": "2023-10-23

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
    {
        "address": "Rua Augusta, 1100-048 Lisboa, Portugal",
        "date": "2023-07-27",
        "time_start": "09:00",
        "time_end": "10:00",
        "price": 5,
        "category": "Cafes"
    },
    {
        "address": "Praça do Comércio, 1100-148 Lisboa, Portugal",
        "date": "2023-07-27",
        "time_start": "10:30",
        "time_end": "12:00",
        "price": 0,
        "category": "Shopping"
    },
    {
        "address": "Rua Garrett, 1200-203 Lisboa, Portugal",
        "date": "2023-07-27",
        "time_start": "12:30",
        "time_end": "14:00",
        "price": 20,
        "category": "Restaurants"
    },
    {
        "address": "Avenida da Liberdade, 1250-096 Lisboa, Portugal",
        "date": "2023-07-27",
        "time_start": "14:30",
        "time_end": "16:00",
        "price": 0,
        "category": "Shopping"
    },
    {
        "address": "Rua Nova do Carvalho, 1200-014 Lisboa, Portugal",
        "date": "202

Total Travel Time: 270 minutes
Activity Score: 100


  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
    {
        "address": "Stadio Pier Luigi Penzo, Campo della Celestia, 30122 Venezia VE, Italy",
        "date": "2023-08-21",
        "time_start": "18:00",
        "time_end": "20:00",
        "price": 50,
        "category": "Live Sports"
    },
    {
        "address": "Parco delle Rimembranze, 30132 Venezia VE, Italy",
        "date": "2023-08-22",
        "time_start": "09:00",
        "time_end": "11:00",
        "price": 0,
        "category": "Parks"
    },
    {
        "address": "Parco San Giuliano, Via Orlanda, 30173 Venezia VE, Italy",
        "date": "2023-08-22",
        "time_start": "14:00",
        "time_end": "16:00",
        "price": 0,
        "category": "Parks"
    },
    {
        "address": "Lido di Venezia, 30126 Venezia VE, Italy",
        "date": "2023-08-23",
        "time_start": "10:00",
        "time_end": "12:00",
        "price": 0,
        "category": "Parks"
    },
    {
        "address": "Stadio Pier Luigi Penzo, Camp

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


Cleaned response content:
[
    {
        "address": "Golden Gate Bridge, San Francisco, CA 94129",
        "date": "2023-07-02",
        "time_start": "09:00",
        "time_end": "11:00",
        "price": 0,
        "category": "Sightseeing"
    },
    {
        "address": "Fisherman's Wharf, San Francisco, CA 94133",
        "date": "2023-07-02",
        "time_start": "12:00",
        "time_end": "14:00",
        "price": 20,
        "category": "Sightseeing"
    },
    {
        "address": "Ocean Beach, San Francisco, CA 94121",
        "date": "2023-07-02",
        "time_start": "15:00",
        "time_end": "17:00",
        "price": 0,
        "category": "Beaches"
    },
    {
        "address": "Blue Bottle Coffee, 66 Mint St, San Francisco, CA 94103",
        "date": "2023-07-02",
        "time_start": "18:00",
        "time_end": "19:00",
        "price": 10,
        "category": "Cafes"
    },
    {
        "address": "Alcatraz Island, San Francisco, CA 94133",
        "date":

  df_eval_metrics = df_eval_metrics.append(new_row, ignore_index=True)


In [62]:
df_eval_metrics
# df_eval_metrics.to_csv('scores_gpt4o_1-50', index=False)

Unnamed: 0,interests_score,budget_score,travel_time_score,location,redundency_score
0,66.666667,47.333333,229,Orlando,75.0
1,66.666667,85.0,158,Washington D.C.,75.0
2,100.0,86.666667,139,Boston,70.0
3,100.0,42.333333,107,New Orleans,100.0
4,100.0,50.0,159,Seattle,75.0
5,33.333333,95.0,101,Honolulu,75.0
6,100.0,66.666667,58,San Diego,75.0
7,100.0,100.0,129,Nashville,75.0
8,100.0,25.0,76,Austin,100.0
9,100.0,80.666667,87,Philadelphia,85.0


In [63]:
sentences

['Create a very busy travel plan for the following location, dates and interests. For each event or activity planned provide a category for the activity, timeframe with hours of the day from when to when the activity will take, price for the activity and address for location of the activity. Location: Orlando Start date: 20-jul End Date: 22-jul Budget: 300,00 Interests: Cafes, Art, Hiking',
 'Create a very busy travel plan for the following location, dates and interests. For each event or activity planned provide a category for the activity, timeframe with hours of the day from when to when the activity will take, price for the activity and address for location of the activity. Location: Washington D.C. Start date: 31-ago End Date: 2-sept Budget: 300,00 Interests: Live Sports, Desserts, Beaches',
 'Create a very busy travel plan for the following location, dates and interests. For each event or activity planned provide a category for the activity, timeframe with hours of the day from w

In [58]:
sentences
# Save the responses to a file
with open('prompts_1-50.json', 'w') as f:
    json.dump(sentences, f, indent=4)

print("prompts saved to prompts_1-50.json")

prompts saved to prompts_1-50.json


In [43]:
responses

[[{'address': '123 E Central Blvd, Orlando, FL 32801',
   'date': '2023-07-20',
   'time_start': '08:00',
   'time_end': '09:30',
   'price': 15,
   'category': 'Cafe'},
  {'address': '2416 N Mills Ave, Orlando, FL 32803',
   'date': '2023-07-20',
   'time_start': '10:00',
   'time_end': '12:00',
   'price': 20,
   'category': 'Art'},
  {'address': '528 E Washington St, Orlando, FL 32801',
   'date': '2023-07-20',
   'time_start': '13:00',
   'time_end': '14:30',
   'price': 10,
   'category': 'Cafe'},
  {'address': '315 Conroy Windermere Rd, Orlando, FL 32835',
   'date': '2023-07-20',
   'time_start': '15:00',
   'time_end': '18:00',
   'price': 0,
   'category': 'Hiking'},
  {'address': '12078 Collegiate Way, Orlando, FL 32817',
   'date': '2023-07-21',
   'time_start': '08:00',
   'time_end': '09:30',
   'price': 12,
   'category': 'Cafe'},
  {'address': '1 Jeff Fuqua Blvd, Orlando, FL 32827',
   'date': '2023-07-21',
   'time_start': '10:00',
   'time_end': '12:00',
   'price': 25

In [60]:
# Save the responses to a file
with open('responses_gpt4_1-50.json', 'w') as f:
    json.dump(responses, f, indent=4)

print("Responses saved to responses.json")

Responses saved to responses.json


In [59]:
responses

[[{'address': '123 E Central Blvd, Orlando, FL 32801',
   'date': '2023-07-20',
   'time_start': '08:00',
   'time_end': '09:30',
   'price': 15,
   'category': 'Cafe'},
  {'address': '2416 N Mills Ave, Orlando, FL 32803',
   'date': '2023-07-20',
   'time_start': '10:00',
   'time_end': '12:00',
   'price': 20,
   'category': 'Art'},
  {'address': '528 E Washington St, Orlando, FL 32801',
   'date': '2023-07-20',
   'time_start': '13:00',
   'time_end': '14:30',
   'price': 10,
   'category': 'Cafe'},
  {'address': '315 Conroy Windermere Rd, Orlando, FL 32835',
   'date': '2023-07-20',
   'time_start': '15:00',
   'time_end': '18:00',
   'price': 0,
   'category': 'Hiking'},
  {'address': '12078 Collegiate Way, Orlando, FL 32817',
   'date': '2023-07-21',
   'time_start': '08:00',
   'time_end': '09:30',
   'price': 12,
   'category': 'Cafe'},
  {'address': '1 Jeff Fuqua Blvd, Orlando, FL 32827',
   'date': '2023-07-21',
   'time_start': '10:00',
   'time_end': '12:00',
   'price': 25