# Kayak Holiday Destination App

In [1]:
# Import relevant libraries
import pandas as pd
import requests
import json
import time

### Step 1: GPS coordinates information retrieval 

In [2]:
# List of cities

cities = [
    "Mont Saint Michel", "St Malo", "Bayeux", "Le Havre", "Rouen", "Paris", "Amiens", "Lille",
    "Strasbourg", "Chateau du Haut Koenigsbourg", "Colmar", "Eguisheim", "Besancon", "Dijon",
    "Annecy", "Grenoble", "Lyon", "Gorges du Verdon", "Bormes les Mimosas", "Cassis", "Marseille",
    "Aix en Provence", "Avignon", "Uzes", "Nimes", "Aigues Mortes", "Saintes Maries de la mer",
    "Collioure", "Carcassonne", "Ariege", "Toulouse", "Montauban", "Biarritz", "Bayonne", "La Rochelle"
]


# API URL

url = "https://nominatim.openstreetmap.org/search"


# Headers as required by Nominatim

headers = {"User-Agent": "holiday-destination-app"}


# Store results

data = []

In [3]:
# Retrieve coordinates information

for city in cities:
    params = {"q": city, "format": "json"}
    response = requests.get(url, params = params, headers = headers)
    
    if response.status_code == 200 and response.json():
        result = response.json()[0]  # Take first result
        data.append({"City": city, "Latitude": result["lat"], "Longitude": result["lon"]})
    else:
        data.append({"City": city, "Latitude": None, "Longitude": None})
    
    time.sleep(1)  

# Save output as DataFrame
df = pd.DataFrame(data)

# Save DF as csv
df.to_csv('Cities_GPS_coordinates_full.csv')

In [4]:
# Round the coordinates to 2 decimal points to comply with OpenWeather API requirements
df_copy = df.copy()

df_copy[['Latitude', 'Longitude']] = df_copy[['Latitude', 'Longitude']].astype(float).round(2)

display(df_copy.head())

# Save file as csv 
df_copy.to_csv('Cities_GPS_coordinates_rounded.csv')

Unnamed: 0,City,Latitude,Longitude
0,Mont Saint Michel,48.64,-1.51
1,St Malo,48.65,-2.03
2,Bayeux,49.28,-0.7
3,Le Havre,49.49,0.11
4,Rouen,49.44,1.09


### Step 2: Weather data retrieval

In [5]:
from dotenv import load_dotenv
import os 

load_dotenv()

API_key = os.environ["API_key"]

In [None]:
# List to store weather DataFrames for each city
weather_data_list = []

# Loop through DataFrame 
for i in range(len(df_copy)):  
    lat = df_copy.loc[i, "Latitude"]
    lon = df_copy.loc[i, "Longitude"]
    city = df_copy.loc[i, "City"]

    # API request
    weather_url = f'https://api.openweathermap.org/data/3.0/onecall?lat={lat}&lon={lon}&exclude=minutely,hourly&appid={API_key}'
    params_weather = {'units': 'metric'}

    response_weather = requests.get(weather_url, params=params_weather)
    result = response_weather.json()

    # Convert forecast daily weather data to a DataFrame
    daily_data = result.get('daily', [])
    df_daily_weather = pd.DataFrame(daily_data)

    # Add city name and lat/lon data to the DataFrame
    df_daily_weather["City"] = city
    df_daily_weather["Latitude"] = lat
    df_daily_weather["Longitude"] = lon

    # Store the DataFrame in the list
    weather_data_list.append(df_daily_weather)

# Concatenate all city weather DataFrames
final_weather_df = pd.concat(weather_data_list, ignore_index=True)

# Print final weather DataFrame
final_weather_df.head()

Unnamed: 0,dt,sunrise,sunset,moonrise,moonset,moon_phase,summary,temp,feels_like,pressure,...,wind_gust,weather,clouds,pop,uvi,rain,City,Latitude,Longitude,snow
0,1738584000,1738567966,1738602416,1738575420,0,0.18,There will be clear sky today,"{'day': 7.86, 'min': 1.21, 'max': 8.62, 'night...","{'day': 6.09, 'night': -0.26, 'eve': 5.08, 'mo...",1028,...,4.14,"[{'id': 800, 'main': 'Clear', 'description': '...",4,0.0,1.37,,Mont Saint Michel,48.64,-1.51,
1,1738670400,1738654281,1738688914,1738662900,1738626480,0.22,Expect a day of partly cloudy with rain,"{'day': 9.43, 'min': 1.32, 'max': 11.07, 'nigh...","{'day': 6.36, 'night': 6.91, 'eve': 5.43, 'mor...",1027,...,10.61,"[{'id': 500, 'main': 'Rain', 'description': 'l...",1,1.0,1.3,0.59,Mont Saint Michel,48.64,-1.51,
2,1738756800,1738740593,1738775413,1738750680,1738717980,0.25,Expect a day of partly cloudy with clear spells,"{'day': 9.74, 'min': 4.7, 'max': 10.13, 'night...","{'day': 8.66, 'night': 2.12, 'eve': 4.17, 'mor...",1040,...,5.74,"[{'id': 803, 'main': 'Clouds', 'description': ...",70,0.76,1.24,,Mont Saint Michel,48.64,-1.51,
3,1738843200,1738826905,1738861912,1738838880,1738809480,0.29,"You can expect partly cloudy in the morning, w...","{'day': 7.97, 'min': 3.76, 'max': 8.47, 'night...","{'day': 4.51, 'night': -0.41, 'eve': 0.61, 'mo...",1039,...,13.51,"[{'id': 803, 'main': 'Clouds', 'description': ...",55,0.0,1.09,,Mont Saint Michel,48.64,-1.51,
4,1738929600,1738913214,1738948412,1738927680,1738900680,0.33,"There will be clear sky until morning, then pa...","{'day': 4.6, 'min': 2.04, 'max': 4.6, 'night':...","{'day': 0.21, 'night': -1.53, 'eve': -0.93, 'm...",1026,...,12.85,"[{'id': 803, 'main': 'Clouds', 'description': ...",83,0.0,1.37,,Mont Saint Michel,48.64,-1.51,


In [7]:
final_weather_df.head()
final_weather_df.shape

(280, 24)

In [50]:
# Convert date to dt format
final_weather_df['dt'] = pd.to_datetime(final_weather_df['dt'], unit='s', utc=True)

# Create new columns for the temperature data (which is a dictionnary)
df_expanded = final_weather_df.join(pd.json_normalize(final_weather_df['temp']))
df_expanded.head()

Unnamed: 0,dt,sunrise,sunset,moonrise,moonset,moon_phase,summary,temp,feels_like,pressure,...,City,Latitude,Longitude,snow,day,min,max,night,eve,morn
0,2025-02-03 12:00:00+00:00,1738567966,1738602416,1738575420,0,0.18,There will be clear sky today,"{'day': 7.86, 'min': 1.21, 'max': 8.62, 'night...","{'day': 6.09, 'night': -0.26, 'eve': 5.08, 'mo...",1028,...,Mont Saint Michel,48.64,-1.51,,7.86,1.21,8.62,2.94,7.09,1.26
1,2025-02-04 12:00:00+00:00,1738654281,1738688914,1738662900,1738626480,0.22,Expect a day of partly cloudy with rain,"{'day': 9.43, 'min': 1.32, 'max': 11.07, 'nigh...","{'day': 6.36, 'night': 6.91, 'eve': 5.43, 'mor...",1027,...,Mont Saint Michel,48.64,-1.51,,9.43,1.32,11.07,8.9,7.98,1.32
2,2025-02-05 12:00:00+00:00,1738740593,1738775413,1738750680,1738717980,0.25,Expect a day of partly cloudy with clear spells,"{'day': 9.74, 'min': 4.7, 'max': 10.13, 'night...","{'day': 8.66, 'night': 2.12, 'eve': 4.17, 'mor...",1040,...,Mont Saint Michel,48.64,-1.51,,9.74,4.7,10.13,4.7,6.47,5.05
3,2025-02-06 12:00:00+00:00,1738826905,1738861912,1738838880,1738809480,0.29,"You can expect partly cloudy in the morning, w...","{'day': 7.97, 'min': 3.76, 'max': 8.47, 'night...","{'day': 4.51, 'night': -0.41, 'eve': 0.61, 'mo...",1039,...,Mont Saint Michel,48.64,-1.51,,7.97,3.76,8.47,4.29,5.01,4.13
4,2025-02-07 12:00:00+00:00,1738913214,1738948412,1738927680,1738900680,0.33,"There will be clear sky until morning, then pa...","{'day': 4.6, 'min': 2.04, 'max': 4.6, 'night':...","{'day': 0.21, 'night': -1.53, 'eve': -0.93, 'm...",1026,...,Mont Saint Michel,48.64,-1.51,,4.6,2.04,4.6,2.56,3.35,2.04


#### DataFrame manipulations

In [27]:
df_expanded = pd.read_csv("Weather-data-3feb25.csv")
df_expanded = df_expanded.iloc[: , 1:]
pd.set_option('display.max_columns', None)
df_expanded.head()

Unnamed: 0,dt,sunrise,sunset,moonrise,moonset,moon_phase,summary,temp,feels_like,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,weather,clouds,pop,uvi,rain,City,Latitude,Longitude,snow,day,min,max,night,eve,morn,average_min_temp,average_max_temp,rain_fall,avg_clouds
0,2025-02-03 12:00:00+00:00,1738567966,1738602416,1738575420,0,0.18,There will be clear sky today,"{'day': 7.86, 'min': 1.21, 'max': 8.62, 'night...","{'day': 6.09, 'night': -0.26, 'eve': 5.08, 'mo...",1028,75,3.71,3.51,167,4.14,"[{'id': 800, 'main': 'Clear', 'description': '...",4,0.0,1.37,,Mont Saint Michel,48.64,-1.51,,7.86,1.21,8.62,2.94,7.09,1.26,2.8625,8.745,0.976667,55.875
1,2025-02-04 12:00:00+00:00,1738654281,1738688914,1738662900,1738626480,0.22,Expect a day of partly cloudy with rain,"{'day': 9.43, 'min': 1.32, 'max': 11.07, 'nigh...","{'day': 6.36, 'night': 6.91, 'eve': 5.43, 'mor...",1027,81,6.13,6.95,209,10.61,"[{'id': 500, 'main': 'Rain', 'description': 'l...",1,1.0,1.3,0.59,Mont Saint Michel,48.64,-1.51,,9.43,1.32,11.07,8.9,7.98,1.32,2.8625,8.745,0.976667,55.875
2,2025-02-05 12:00:00+00:00,1738740593,1738775413,1738750680,1738717980,0.25,Expect a day of partly cloudy with clear spells,"{'day': 9.74, 'min': 4.7, 'max': 10.13, 'night...","{'day': 8.66, 'night': 2.12, 'eve': 4.17, 'mor...",1040,70,4.26,3.56,351,5.74,"[{'id': 803, 'main': 'Clouds', 'description': ...",70,0.76,1.24,,Mont Saint Michel,48.64,-1.51,,9.74,4.7,10.13,4.7,6.47,5.05,2.8625,8.745,0.976667,55.875
3,2025-02-06 12:00:00+00:00,1738826905,1738861912,1738838880,1738809480,0.29,"You can expect partly cloudy in the morning, w...","{'day': 7.97, 'min': 3.76, 'max': 8.47, 'night...","{'day': 4.51, 'night': -0.41, 'eve': 0.61, 'mo...",1039,67,2.13,7.03,47,13.51,"[{'id': 803, 'main': 'Clouds', 'description': ...",55,0.0,1.09,,Mont Saint Michel,48.64,-1.51,,7.97,3.76,8.47,4.29,5.01,4.13,2.8625,8.745,0.976667,55.875
4,2025-02-07 12:00:00+00:00,1738913214,1738948412,1738927680,1738900680,0.33,"There will be clear sky until morning, then pa...","{'day': 4.6, 'min': 2.04, 'max': 4.6, 'night':...","{'day': 0.21, 'night': -1.53, 'eve': -0.93, 'm...",1026,72,-0.21,6.84,48,12.85,"[{'id': 803, 'main': 'Clouds', 'description': ...",83,0.0,1.37,,Mont Saint Michel,48.64,-1.51,,4.6,2.04,4.6,2.56,3.35,2.04,2.8625,8.745,0.976667,55.875


#### Create unique City IDs

In [31]:
# Step 1: get all unique cities across both datasets
df_hotel = pd.read_csv("booking_hotels_allcities_full.csv")

all_cities = pd.Index(df_expanded["City"]).union(df_hotel["City"])

# Step 2: build a mapping (factorize once)
city_to_id = {city: idx+1 for idx, city in enumerate(all_cities)}

# Step 3: map in both DataFrames
df_expanded["City_ID"] = df_expanded["City"].map(city_to_id)
df_hotel["City_ID"]    = df_hotel["City"].map(city_to_id)


In [32]:
df_expanded.to_csv("Weather_dataset_final_cityID.csv", index=False)
df_hotel.to_csv("Hotel_dataset_final_cityID.csv", index=False)

In [19]:
# Calculate average min temp, average max temp and average rain probability during the next 7 days
city_averages = df_expanded.groupby("City").agg(
    #average_min_temp=("min", "mean"),
    #average_max_temp=("max", "mean"),
    rain_prob=("pop", "mean"),
    #avg_clouds=("clouds", "mean"),
    wind=("wind_speed", "mean")
)

# Merge back to the original dataframe
df_expanded = df_expanded.merge(city_averages, on="City", how="left")

df_expanded.head()

Unnamed: 0,dt,sunrise,sunset,moonrise,moonset,moon_phase,summary,temp,feels_like,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,weather,clouds,pop,uvi,rain,City,Latitude,Longitude,snow,day,min,max,night,eve,morn,average_min_temp,average_max_temp,rain_fall,avg_clouds,rain_prob,wind
0,2025-02-03 12:00:00+00:00,1738567966,1738602416,1738575420,0,0.18,There will be clear sky today,"{'day': 7.86, 'min': 1.21, 'max': 8.62, 'night...","{'day': 6.09, 'night': -0.26, 'eve': 5.08, 'mo...",1028,75,3.71,3.51,167,4.14,"[{'id': 800, 'main': 'Clear', 'description': '...",4,0.0,1.37,,Mont Saint Michel,48.64,-1.51,,7.86,1.21,8.62,2.94,7.09,1.26,2.8625,8.745,0.976667,55.875,0.39625,5.3175
1,2025-02-04 12:00:00+00:00,1738654281,1738688914,1738662900,1738626480,0.22,Expect a day of partly cloudy with rain,"{'day': 9.43, 'min': 1.32, 'max': 11.07, 'nigh...","{'day': 6.36, 'night': 6.91, 'eve': 5.43, 'mor...",1027,81,6.13,6.95,209,10.61,"[{'id': 500, 'main': 'Rain', 'description': 'l...",1,1.0,1.3,0.59,Mont Saint Michel,48.64,-1.51,,9.43,1.32,11.07,8.9,7.98,1.32,2.8625,8.745,0.976667,55.875,0.39625,5.3175
2,2025-02-05 12:00:00+00:00,1738740593,1738775413,1738750680,1738717980,0.25,Expect a day of partly cloudy with clear spells,"{'day': 9.74, 'min': 4.7, 'max': 10.13, 'night...","{'day': 8.66, 'night': 2.12, 'eve': 4.17, 'mor...",1040,70,4.26,3.56,351,5.74,"[{'id': 803, 'main': 'Clouds', 'description': ...",70,0.76,1.24,,Mont Saint Michel,48.64,-1.51,,9.74,4.7,10.13,4.7,6.47,5.05,2.8625,8.745,0.976667,55.875,0.39625,5.3175
3,2025-02-06 12:00:00+00:00,1738826905,1738861912,1738838880,1738809480,0.29,"You can expect partly cloudy in the morning, w...","{'day': 7.97, 'min': 3.76, 'max': 8.47, 'night...","{'day': 4.51, 'night': -0.41, 'eve': 0.61, 'mo...",1039,67,2.13,7.03,47,13.51,"[{'id': 803, 'main': 'Clouds', 'description': ...",55,0.0,1.09,,Mont Saint Michel,48.64,-1.51,,7.97,3.76,8.47,4.29,5.01,4.13,2.8625,8.745,0.976667,55.875,0.39625,5.3175
4,2025-02-07 12:00:00+00:00,1738913214,1738948412,1738927680,1738900680,0.33,"There will be clear sky until morning, then pa...","{'day': 4.6, 'min': 2.04, 'max': 4.6, 'night':...","{'day': 0.21, 'night': -1.53, 'eve': -0.93, 'm...",1026,72,-0.21,6.84,48,12.85,"[{'id': 803, 'main': 'Clouds', 'description': ...",83,0.0,1.37,,Mont Saint Michel,48.64,-1.51,,4.6,2.04,4.6,2.56,3.35,2.04,2.8625,8.745,0.976667,55.875,0.39625,5.3175


In [20]:
# Filter dataframe to only columns of interest
df_filtered = df_expanded[["City", "Latitude", "Longitude", "average_min_temp", "average_max_temp", "rain_prob", "avg_clouds", "wind"]].drop_duplicates()

In [22]:
df_sorted = df_filtered.sort_values(
    by=["average_max_temp", "avg_clouds"],
    ascending=[False, True]  # Descending for max temp, ascending clouds 
)

pd.set_option('display.max_columns', None)
display(df_sorted.head())


Unnamed: 0,City,Latitude,Longitude,average_min_temp,average_max_temp,rain_prob,avg_clouds,wind
160,Marseille,43.3,5.37,10.07625,13.205,0.3,54.875,5.4625
216,Collioure,42.53,3.08,6.72625,13.02875,0.335,52.125,4.50625
168,Aix en Provence,43.53,5.45,6.76,12.96125,0.115,54.25,3.62
152,Cassis,43.21,5.54,9.265,12.51,0.3175,54.625,5.4675
200,Aigues Mortes,43.57,4.19,6.075,12.12375,0.32,52.875,6.73875


In [23]:
# Create an id for each city
df_sorted["City_ID"] = pd.factorize(df_sorted["City"])[0] + 1

df_sorted.shape


(35, 9)

In [25]:
df_sorted

Unnamed: 0,City,Latitude,Longitude,average_min_temp,average_max_temp,rain_prob,avg_clouds,wind,City_ID
160,Marseille,43.3,5.37,10.07625,13.205,0.3,54.875,5.4625,1
216,Collioure,42.53,3.08,6.72625,13.02875,0.335,52.125,4.50625,2
168,Aix en Provence,43.53,5.45,6.76,12.96125,0.115,54.25,3.62,3
152,Cassis,43.21,5.54,9.265,12.51,0.3175,54.625,5.4675,4
200,Aigues Mortes,43.57,4.19,6.075,12.12375,0.32,52.875,6.73875,5
144,Bormes les Mimosas,43.15,6.34,7.4175,12.03375,0.30375,52.5,5.7975,6
208,Saintes Maries de la mer,43.45,4.43,7.16,11.805,0.2975,55.75,10.01875,7
192,Nimes,43.84,4.36,4.05125,11.67125,0.3175,47.375,6.1625,8
176,Avignon,43.95,4.81,4.2225,11.6275,0.15625,52.5,6.75375,9
224,Carcassonne,43.21,2.35,3.72625,11.2075,0.25,44.0,4.96625,10


In [26]:
# Export file
df_sorted.to_csv('Weather-data-sorted-3feb25-cityID.csv')

In [24]:
# Determine the cities with the best weather 
# Refine rule to only keep cities with:
df_sorted_2 = df_sorted[(df_sorted["average_max_temp"] > 10) & 
                        (df_sorted["rain_prob"] <= 0.25) & 
                        (df_sorted["avg_clouds"] < 55) & 
                        (df_sorted["wind"] < 7)]
df_sorted_2

best_cities = df_sorted_2.head(5)
print(best_cities)

                City  Latitude  Longitude  average_min_temp  average_max_temp  \
168  Aix en Provence     43.53       5.45           6.76000          12.96125   
176          Avignon     43.95       4.81           4.22250          11.62750   
224      Carcassonne     43.21       2.35           3.72625          11.20750   
120         Grenoble     45.19       5.74           3.31250          11.10125   
240         Toulouse     43.60       1.44           3.87000          10.86750   

     rain_prob  avg_clouds     wind  City_ID  
168    0.11500      54.250  3.62000        3  
176    0.15625      52.500  6.75375        9  
224    0.25000      44.000  4.96625       10  
120    0.00000      40.875  1.85375       12  
240    0.12875      40.375  3.22500       15  
