In [2]:
import pandas as pd
import requests
import json
import plotly.express as px

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
# !python3 ./Scrap_best-cities.py

In [4]:
f = open("Best_cities.json")
cities = json.load(f)
f.close()

destinations = []

for i in cities:
    destinations.append(i['city'])

In [5]:
coordenates = {}
for i, val in enumerate(destinations):
    coordenates[val] = {}

In [6]:
for i in range(len(destinations)):
    payload = {'city': f'{destinations[i]}','format': 'json','limit' : 1}
    r = requests.get('https://nominatim.openstreetmap.org/search', params=payload)
    lat = r.json()[0]['lat']
    lon = r.json()[0]['lon']
    coordenates[f'{destinations[i]}'] = [float(lat),float(lon)]

In [7]:
coord = pd.DataFrame(coordenates).transpose()
coord.columns = ['latitude','longitude']
coord = coord.rename_axis('destination').reset_index()

In [8]:
coord.head()

Unnamed: 0,destination,latitude,longitude
0,Mont Saint Michel,48.635954,-1.51146
1,St Malo,49.314695,-96.953823
2,Bayeux,49.276462,-0.702474
3,Le Havre,49.493898,0.107973
4,Rouen,49.440459,1.093966


In [9]:
destination_weather = {}
for dest, lat, lon in zip(coord['destination'],coord['latitude'],coord['longitude']):
    data = requests.get(f'https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid=ddba69ada430d874329049dbdca185fc&units=metric').json()

    weather = {}
    for i in range(len(data['list'])):
        
        date = data['list'][i]['dt_txt'] # date and time ISO, UTC
        temp = data['list'][i]['main']['temp'] # celcius
        cloud = data['list'][i]['clouds']['all'] #cloudiness %
        rain = data['list'][i]['pop']  # prob precipitation
        wind = data['list'][i]['wind']['speed'] # m/s

        weather[date] = {'temp' : temp,
                    'cloud' : cloud,
                    'rain' : rain*100,
                    'wind' : wind }

    weather_df = pd.DataFrame(weather).T.rename_axis('date').reset_index()
    weather_df['date'] = weather_df['date'].str[:-9]
    weather_df = weather_df.groupby('date').mean().reset_index()

    destination_weather[dest] =  weather_df

In [10]:
destination_weather['Marseille']

Unnamed: 0,date,temp,cloud,rain,wind
0,2023-10-04,22.932,44.0,0.0,5.126
1,2023-10-05,22.1725,52.625,0.0,3.03875
2,2023-10-06,21.5575,59.625,0.0,1.7
3,2023-10-07,22.08,45.125,0.0,1.15125
4,2023-10-08,22.29875,21.125,0.0,2.41125
5,2023-10-09,20.83,28.0,0.0,2.273333


In [11]:
best_weather = {}
for key, value in destination_weather.items():
    best_weather[key] = value.mean()

best_weather_df = pd.DataFrame(best_weather).T
best_weather_df['weather_score'] = best_weather_df.apply(lambda x: -100/(x.temp - (x.cloud + x.rain + x.wind)), axis=1) ## temp and rain are multiplied by 2 and 3 respectively to have them weigh more in score calculation
best_weather_df = best_weather_df.rename_axis("destination").reset_index()

In [12]:
best_weather_df = best_weather_df.merge(coord,on='destination')
best_weather_df.to_csv("destination_weather.csv")
best_weather_df = best_weather_df.sort_values(by='weather_score',ascending=False)
best_weather_df[['temp', 'rain', 'wind', 'cloud']] = best_weather_df[['temp', 'rain', 'wind', 'cloud']].apply(lambda x : round(x,2))

### Top 5 cities in terms of weather are :

In [13]:
best_weather_df

Unnamed: 0,destination,temp,cloud,rain,wind,weather_score,latitude,longitude
27,Collioure,20.94,39.49,0.0,1.93,4.882005,42.52505,3.083155
19,Cassis,21.83,41.3,0.0,2.4,4.571257,43.214036,5.539632
25,Aigues Mortes,20.29,39.9,0.0,2.62,4.497175,43.566152,4.19154
20,Marseille,21.98,41.75,0.0,2.62,4.466618,43.296174,5.369953
34,La Rochelle,18.56,37.7,0.0,3.44,4.427505,46.159113,-1.152043
18,Bormes les Mimosas,20.5,41.32,0.0,2.08,4.365843,43.150697,6.341928
24,Nímes,19.49,39.98,0.0,2.83,4.289095,43.837425,4.360069
21,Aix en Provence,21.15,44.35,0.0,1.65,4.022994,43.529842,5.447474
28,Carcassonne,20.28,42.84,0.0,2.5,3.991241,43.213036,2.349107
26,Saintes Maries de la mer,20.31,41.85,0.0,3.79,3.947587,43.451592,4.42772


### Booking scraping

In [14]:
url_list = []
for i in destinations:
    i = i.replace(" ","+")
    url_list.append(f"https://www.booking.com/searchresults.fr.html?ss={i}")

In [15]:
# !python3 ./Scrap_booking.py

In [106]:
f = open("Hotels.json")
data = json.load(f)
f.close()

hotels = pd.DataFrame(data)
hotels[['latitude', 'longitude']] = hotels['coord'].str.split(',', expand = True).astype(float)
hotels = hotels.drop('coord', axis=1)
hotels = hotels[['city', 'name', 'score', 'latitude', 'longitude', 'desc', 'url']]
hotels.columns = ['destination', 'hotel_name', 'hotel_score', 'latitude', 'longitude', 'description', 'url']
hotels['hotel_score'] = hotels['hotel_score'].str.replace(",",".").astype(float)
hotels = hotels.dropna(subset=['hotel_name'])
hotels['destination'] = hotels['destination'].apply(lambda x: x[:-1]).str.replace("-"," ")



In [107]:
hotels.head()

Unnamed: 0,destination,hotel_name,hotel_score,latitude,longitude,description,url
0,Le Mont Saint Michel,Gîte proche du Mont Saint-Michel accès à pieds,9.0,48.59839,-1.503451,Gîte proche du Mont Saint-Michel accès à pieds...,https://www.booking.com/hotel/fr/gite-proche-d...
1,Saint Malo,Brit Hotel Le Surcouf,7.7,48.657735,-1.994373,Situé dans le paisible quartier de Courtoisvil...,https://www.booking.com/hotel/fr/le-surcouf.fr...
2,Bayeux,Hôtel Le Bayeux,7.8,49.274159,-0.704506,"Situé dans le centre historique de Bayeux, en ...",https://www.booking.com/hotel/fr/le-bayeux.fr....
3,Le Havre,Student Factory Le Havre Les Docks,8.8,49.488044,0.133123,"Doté d'un salon commun, le Student Factory Le ...",https://www.booking.com/hotel/fr/student-facto...
4,Rouen,studi'home,8.1,49.443658,1.090557,Situé à moins de 600 mètres du centre de Rouen...,https://www.booking.com/hotel/fr/studi-39-home...


In [18]:
hotels.to_csv("hotels_data.csv")

### For S3 deployment take screenshots 

##### Always delete IAM user to prevent leaks and hacking, never push access keys

In [19]:
import boto3
session = boto3.Session(aws_access_key_id="",
                        aws_secret_access_key="")

In [20]:
s3 = session.resource("s3")

In [21]:
bucket = s3.Bucket('bucket-for-project-1234')

In [22]:
best_weather_csv = best_weather_df.to_csv()
hotels_csv = hotels.to_csv()

put_object = bucket.put_object(Key="dest_weather.csv", Body=best_weather_csv)
put_object = bucket.put_object(Key="dest_hotels.csv", Body=hotels_csv)

NoCredentialsError: Unable to locate credentials

In [None]:
## RDS stuff

In [108]:
fig = px.scatter_mapbox(best_weather_df, lat="latitude", lon="longitude",mapbox_style="open-street-map", \
                        hover_name="destination",color='weather_score',size='weather_score', \
                        hover_data={'temp': True,'rain': True,'cloud': True, 'wind': True, \
                                    'weather_score': False, 'latitude':False, 'longitude': False})
fig.update_layout(mapbox_style='open-street-map',
                  mapbox_zoom = 4.8,
                  mapbox_center_lat= 47,
                  mapbox_center_lon= 2,
                  margin={"r": 5, "t": 5, "l": 5, "b": 5},
                  autosize=False,
                  width=800,
                  height=500)
fig.show()
top_five = list(best_weather_df['destination'][:5])
print(f"The top five best destiantions are {top_five}")


The top five best destiantions are ['Collioure', 'Cassis', 'Aigues Mortes', 'Marseille', 'La Rochelle']


In [109]:
top_five = list(best_weather_df['destination'][:5])
mask = hotels['destination'].isin(top_five)
top_five_hotels = hotels[mask]

fig = px.scatter_mapbox(top_five_hotels, lat="latitude", lon="longitude",mapbox_style="open-street-map",hover_name="hotel_name",\
                        hover_data={'hotel_name': False,	'hotel_score': True,	'latitude': False,	'longitude': False,	'description': False,	'url': False})

fig.update_layout(mapbox_style='open-street-map',
                  mapbox_zoom = 4.8,
                  mapbox_center_lat= 47,
                  mapbox_center_lon= 2,
                  margin={"r": 5, "t": 5, "l": 5, "b": 5},
                  autosize=False,
                  width=800,
                  height=500)