In [17]:
import requests
import pandas as pd
import time
import os
import json
import datetime

today = datetime.date.today()
one_week_later = today + datetime.timedelta(days=7)

checkin_date = today.strftime("%Y-%m-%d")
checkout_date = one_week_later.strftime("%Y-%m-%d")

#  French cities
cities = ["Mont Saint Michel", "St Malo", "Bayeux", "Le Havre", "Rouen", "Paris", "Amiens", "Lille", "Strasbourg",
          "Chateau du Haut Koenigsbourg", "Colmar", "Eguisheim", "Besancon", "Dijon", "Annecy", "Grenoble", "Lyon",
          "Gorges du Verdon", "Bormes les Mimosas", "Cassis", "Marseille", "Aix en Provence","Avignon", "Uzes",
          "Nimes", "Aigues Mortes", "Saintes Maries de la mer", "Collioure", "Carcassonne", "Ariege", "Toulouse",
          "Montauban", "Biarritz", "Bayonne", "La Rochelle"]

# Get the API key from the environment variable
OWM_API_KEY = "WEATHER_API_KEY"

base_url = "https://hotels-com-provider.p.rapidapi.com"
headers = {
    "X-RapidAPI-Key": "API_KEY",
    "X-RapidAPI-Host": "hotels-com-provider.p.rapidapi.com"
}


def get_gps_coordinates(city):
    headers = {"User-Agent": "touristicsapp"}
    response = requests.get(f"https://nominatim.openstreetmap.org/search?city={city}&format=json", headers=headers)
    data = json.loads(response.text)
    if data:
        return data[0]["lat"], data[0]["lon"]
    else:
        return None, None

def get_weather(lat, lon):
    response = requests.get(f"http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={OWM_API_KEY}&units=metric")
    data = json.loads(response.text)
    if 'main' in data:
        weather_description = data["weather"][0]["description"]
        temperature = data["main"]["temp"]
        return weather_description, temperature
    else:
        print(f"Unexpected response from weather API: {data}")
        return None, None

city_data = []
for city in cities:
    lat, lon = get_gps_coordinates(city)
    time.sleep(1)  # Delay to prevent heavy usage
    if lat and lon:
        weather, temperature = get_weather(lat, lon)
        city_data.append({
            'city': city,
            'lat': lat,
            'lon': lon,
            'weather': weather,
            'temperature': temperature
        })

# Convert the city data into a pandas DataFrame
    ...
    df_cities = pd.DataFrame(city_data)

    # Fetch hotels data
for index, row in df_cities.iterrows():
    hotel_data = []
    city = row['city']
    lat = row['lat']
    lon = row['lon']
    weather = row['weather']
    temperature= row['temperature']
    print(f"Fetching region data for city: {city}")
    querystring = {"locale":"fr_FR","query": city,"domain":"FR"}
    response = requests.get(f"{base_url}/v2/regions", headers=headers, params=querystring)
    region_data = response.json()

    for data in region_data['data']:
        if data['type'] == 'CITY':
            region_id = data['gaiaId']
            coordinates = data['coordinates']
            break
    time.sleep(1)  # add delay here

    print(f"Fetching hotel data for city: {city}")
    hotel_count = 0
    querystring = {
        "domain": "FR",
        "sort_order": "RECOMMENDED",
        "locale": "fr_FR",
        "region_id": region_id,
        "checkin_date": checkin_date,
        "checkout_date": checkout_date,
        "adults_number": "1",
        "available_filter": "SHOW_AVAILABLE_ONLY",
        "star_rating_ids": "3,4,5"}

    response = requests.get(f"{base_url}/v2/hotels/search", headers=headers, params=querystring)
    response_json = response.json()

    if 'properties' in response_json:
        for hotel in response_json['properties']:
            if hotel['availability']['available']:
                overall_rank = round(hotel['averageOverallRating']['raw']) if 'averageOverallRating' in hotel else None
                hotel_data.append({
                    'city': city,
                    'lat': lat,
                    'lon': lon,
                    'weather': weather,
                    'temperature': temperature,
                    'id': hotel['id'],
                    'overall_rank': overall_rank,
                    'availability': hotel['availability']['available'],
                })

                response = requests.get(f"{base_url}/v2/hotels/summary", headers=headers, params={
                    "domain": "FR",
                    "locale": "fr_FR",
                    "hotel_id": hotel['id']
                })

                summary_data = response.json()
                hotel_name = summary_data['summary']['name']
                hotel_tag = summary_data['summary']['tagline']

                if hotel_name and hotel_tag:
                    hotel_data[-1].update({
                        'name': hotel_name,
                        'tag': hotel_tag
                    })

                print(f"Fetching review summary for hotel ID {hotel['id']}...")
                response = requests.get(f"{base_url}/v2/hotels/reviews/summary", headers=headers, params={
                    "domain": "FR",
                    "locale": "fr_FR",
                    "hotel_id": hotel['id']
                })

                summary_data = response.json()

                if len(summary_data) > 0:
                    overall_rating = summary_data[0].get('averageOverallRating', {}).get('raw')
                    if overall_rating is not None:
                        hotel_data[-1]['overall_rank'] = overall_rating

                hotel_count += 1
                if hotel_count >= 15:
                    break

            time.sleep(1)

    # Convert the hotel data into a pandas DataFrame
    df_hotels = pd.DataFrame(hotel_data)
    if not os.path.isfile('frenchhotels.csv'):
        df_hotels.to_csv('frenchhotels.csv', index=False)
    else:  # Else it exists so append without writing the header
        df_hotels.to_csv('frenchhotels.csv', mode='a', header=False, index=False)


Fetching region data for city: Avignon
Fetching hotel data for city: Avignon
Fetching review summary for hotel ID 174874...
Fetching review summary for hotel ID 21947180...
Fetching review summary for hotel ID 1298419...
Fetching review summary for hotel ID 11836793...
Fetching review summary for hotel ID 1168939...
Fetching review summary for hotel ID 1104788...
Fetching review summary for hotel ID 23631245...
Fetching review summary for hotel ID 8487337...
Fetching review summary for hotel ID 18123940...
Fetching review summary for hotel ID 2532551...
Fetching review summary for hotel ID 4184690...
Fetching review summary for hotel ID 2684990...
Fetching review summary for hotel ID 1346802...
Fetching review summary for hotel ID 91144108...
Fetching review summary for hotel ID 1967431...
Fetching region data for city: Uzes
Fetching hotel data for city: Uzes
Fetching review summary for hotel ID 1967431...
Fetching review summary for hotel ID 174874...
Fetching review summary for hotel

In [None]:
import plotly.graph_objs as go

# Assume df_top_cities is your DataFrame containing the top 5 cities, 
# and df_top_hotels is your DataFrame containing the top 20 hotels.
# Both DataFrames have columns 'lat' and 'long' for the coordinates.

# Create a trace for the cities
trace_cities = go.Scattergeo(
    lat = df_top_cities['lat'],
    lon = df_top_cities['long'],
    text = df_top_cities['city'],
    mode = 'markers',
    marker_color = 'rgba(135, 206, 235, 0.8)',
    name = 'Top 5 Cities'
)

# Create a trace for the hotels
trace_hotels = go.Scattergeo(
    lat = df_top_hotels['lat'],
    lon = df_top_hotels['long'],
    text = df_top_hotels['name'],
    mode = 'markers',
    marker_color = 'rgba(255, 0, 0, 0.8)',
    name = 'Top 20 Hotels'
)

data = [trace_cities, trace_hotels]

layout = go.Layout(
    title_text = 'Top 5 Cities and Top 20 Hotels',
    geo_scope='europe',  # limit map scope to europe
)

fig = go.Figure(data=data, layout=layout)

fig.show()


In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Assume df_top_cities is your DataFrame containing the top 5 cities,
# df_top_hotels is your DataFrame containing the top 20 hotels.
# Both DataFrames have columns 'lat' and 'long' for the coordinates.

# Create a trace for the cities
trace_cities = go.Scattergeo(
    lat = df_top_cities['lat'],
    lon = df_top_cities['long'],
    text = df_top_cities['city'],
    mode = 'markers',
    marker_color = 'rgba(135, 206, 235, 0.8)',
    name = 'Top 5 Cities'
)

# Create a trace for the hotels
trace_hotels = go.Scattergeo(
    lat = df_top_hotels['lat'],
    lon = df_top_hotels['long'],
    text = df_top_hotels['name'],
    mode = 'markers',
    marker_color = 'rgba(255, 0, 0, 0.8)',
    name = 'Top 20 Hotels'
)

# Assume df_cities_temperature is your DataFrame containing the average temperature
# for the next week for each city. It has columns 'city' and 'temperature'.
trace_temperature = go.Bar(
    x = df_cities_temperature['city'],
    y = df_cities_temperature['temperature'],
    name = 'Temperature'
)

# Create subplots: 1 row, 2 columns
fig = make_subplots(rows=1, cols=2, subplot_titles=('Map', 'Temperature'), column_widths=[0.7, 0.3])

# Adding traces to subplots
fig.add_trace(trace_cities, row=1, col=1)
fig.add_trace(trace_hotels, row=1, col=1)
fig.add_trace(trace_temperature, row=1, col=2)

# Updating geo subplot properties
fig.update_geos(scope='europe', row=1, col=1)

# Updating layout properties
fig.update_layout(height=600, title_text="Top 5 Cities and Top 20 Hotels with Temperature")

fig.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Create a trace for the cities
trace_cities = go.Scattergeo(
    lon = df_all_cities['long'],
    lat = df_all_cities['lat'],
    text = df_all_cities['city'],
    mode = 'markers',
    marker = dict(
        size = 8,
        color = df_all_cities['temperature'], # set color equal to temperature
        colorscale = 'YlOrRd', # choose a colorscale
        colorbar_title = "Temperature",
        showscale = True
    ),
    name = 'Cities'
)

# Create a trace for the hotels
trace_hotels = go.Scattergeo(
    lon = df_top_hotels['long'],
    lat = df_top_hotels['lat'],
    text = df_top_hotels['name'],
    mode = 'markers',
    marker = dict(
        size = 8,
        color = 'rgb(0, 0, 255)', # set color of hotels
    ),
    name = 'Top Hotels'
)

# Define layout
layout = go.Layout(
    title_text = 'French Cities and Top Hotels',
    showlegend = True,
    geo = dict(
        scope='europe',
        projection_type='natural earth',
        showland = True,
        landcolor = 'rgb(243, 243, 243)',
        countrycolor = 'rgb(204, 204, 204)',
    ),
)

# Define figure and add traces
fig = go.Figure(layout=layout)
fig.add_trace(trace_cities)
fig.add_trace(trace_hotels)

# Show figure
fig.show()


In [18]:

import pandas as pd
hotels_set= pd.read_csv('frenchhotels.csv')

hotels_set.head()
hotels_set.info()# any null values detected. File to register on S3