In [5]:
# import of libraries 
from sqlalchemy import create_engine, text
import pandas as pd
import plotly.express as px
import psycopg2
import os
from datetime import datetime
import json

In [6]:
# Call of our date in S3 
weather = pd.read_csv('s3://booking-scapping/city_weather_by_day.csv')
hotel = pd.read_csv('s3://booking-scapping/hotels_info.csv')

In [7]:
# Keep usefull data before sending it to our DB : take the weather by day of the top 5 city 

top_5_city_name = pd.read_csv('top_5_city_name.csv', names= ['city'])
top_5_city_list = [x.strip() for x in top_5_city_name['city']]

city_weather_top_5 = weather[weather['city'].isin(top_5_city_list)]
city_weather_top_5.head()

Unnamed: 0.1,Unnamed: 0,city,dt_object,main_weather,prepcipitation,temperature,latitude,longitude
0,0,Aigues Mortes,2023-02-15,Clouds,0.0,10.71,43.565823,4.191284
1,1,Aigues Mortes,2023-02-16,Clouds,0.0,9.81125,43.565823,4.191284
2,2,Aigues Mortes,2023-02-17,Clear,0.0,10.83,43.565823,4.191284
3,3,Aigues Mortes,2023-02-18,Clear,0.0,12.93875,43.565823,4.191284
4,4,Aigues Mortes,2023-02-19,Clear,0.0,12.17375,43.565823,4.191284


In [8]:
# Clean the scrapping data before sending it to our DB

hotel[['lat', 'lon']] = hotel['latlng'].str.split(',', 1, expand=True)
hotel = hotel.drop(['depth','download_timeout','download_slot','latlng','download_latency'], axis=1)

hotel['Score'] = hotel['Score'].replace({ ',' : '.'}, regex=True)

hotel['Score'] = hotel['Score'].astype(float)
hotel['lat'] = hotel['lat'].astype(float)
hotel['lon'] = hotel['lon'].astype(float)

  hotel[['lat', 'lon']] = hotel['latlng'].str.split(',', 1, expand=True)


In [10]:
# gets our secrets for connection at RDS

import json
with open("./secrets.json") as f:
    secrets = json.load(f)


In [11]:
# send to our DB

#engine = create_engine("sqlite:///:memory:", echo=True)
engine = create_engine(f'postgresql+psycopg2://{secrets["DBUSER"]}:{secrets["DBPASS"]}@{secrets["DBHOST"]}', echo=True)

city_weather_top_5.to_sql(
    f"city_weather_top_5",
    engine,
    if_exists='append'
)

hotel.to_sql(
    "hotel",
    engine,
    if_exists='append'
)

2023-02-16 16:12:05,165 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2023-02-16 16:12:05,166 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-02-16 16:12:05,461 INFO sqlalchemy.engine.Engine select current_schema()
2023-02-16 16:12:05,461 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-02-16 16:12:05,760 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2023-02-16 16:12:05,761 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-02-16 16:12:06,066 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2023-02-16 16:12:06,067 INFO sqlalchemy.engine.Engine [generated in 0.00149s] {'name': 'city_weather_top_5'}
2023-02-16 16:12:06,521 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-02-16 16:12:06,523 INFO sqlalchemy.engine.Engine INSERT INTO city_weather_top_5 (index, "Unnamed: 0", city, dt_object, main_weather, prepcipitation, temperature, latitude, l

100

## Call tables from our BD and do visualization 

In [12]:
# Weather table 

stmt = text("SELECT * FROM city_weather_top_5 "
            )

weather = pd.read_sql(
        stmt,
        engine
    )

weather.head()

2023-02-16 16:12:08,368 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2023-02-16 16:12:08,370 INFO sqlalchemy.engine.Engine [cached since 2.304s ago] {'name': 'SELECT * FROM city_weather_top_5 '}
2023-02-16 16:12:08,832 INFO sqlalchemy.engine.Engine SELECT * FROM city_weather_top_5 
2023-02-16 16:12:08,833 INFO sqlalchemy.engine.Engine [generated in 0.00082s] {}


Unnamed: 0.1,index,Unnamed: 0,city,dt_object,main_weather,prepcipitation,temperature,latitude,longitude
0,0,0,Aigues Mortes,2023-02-15,Clouds,0.0,10.71,43.565823,4.191284
1,1,1,Aigues Mortes,2023-02-16,Clouds,0.0,9.81125,43.565823,4.191284
2,2,2,Aigues Mortes,2023-02-17,Clear,0.0,10.83,43.565823,4.191284
3,3,3,Aigues Mortes,2023-02-18,Clear,0.0,12.93875,43.565823,4.191284
4,4,4,Aigues Mortes,2023-02-19,Clear,0.0,12.17375,43.565823,4.191284


In [13]:
# Add a proportinal colum on temperature for better see the variation 
city_weather_top_5['temp_cube'] = city_weather_top_5['temperature']**3
city_weather_top_5.head()

city_weather_top_5['dt_object'] = city_weather_top_5['dt_object'].astype(str)

In [14]:
fig = px.scatter_mapbox(city_weather_top_5, lat="latitude", lon="longitude",
                        color="temperature", size ='temp_cube', zoom=7, mapbox_style="carto-positron",
                        animation_frame = "dt_object", animation_group = 'temperature',
                        title="Top 5 best weather cities")
fig.show()

In [15]:
# Hotel info
stmt = text("SELECT * FROM hotel "
            )

df = pd.read_sql(
        stmt,
        engine
    )

df.head()

2023-02-16 16:12:09,991 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2023-02-16 16:12:09,993 INFO sqlalchemy.engine.Engine [cached since 3.928s ago] {'name': 'SELECT * FROM hotel '}
2023-02-16 16:12:10,471 INFO sqlalchemy.engine.Engine SELECT * FROM hotel 
2023-02-16 16:12:10,471 INFO sqlalchemy.engine.Engine [generated in 0.00049s] {}


Unnamed: 0.1,index,Unnamed: 0,hotel name,Url to its booking.com page,Score,Text description,lat,lon
0,0,0,"Typique, authentique, super pratique",https://www.booking.com/hotel/fr/typique-authe...,7.0,Doté d'une connexion Wi-Fi gratuite et offrant...,43.295389,5.379165
1,1,1,Maison 3 chambres gde terrasse proche vieux po...,https://www.booking.com/hotel/fr/maison-de-vil...,7.3,"Située à Marseille, à seulement 3 km de la sta...",43.314335,5.388421
2,2,2,Ibis Marseille Centre Prefecture,https://www.booking.com/hotel/fr/premiere-clas...,7.0,L’Ibis Marseille Centre Préfecture met à votre...,43.291553,5.381971
3,3,3,Hôtel Maison Montgrand - Vieux Port,https://www.booking.com/hotel/fr/montgrand.fr....,7.9,"Doté d'une terrasse spacieuse, d'un terrain de...",43.290963,5.376429
4,4,4,Residhotel Vieux Port,https://www.booking.com/hotel/fr/residhotel-vi...,7.3,Le Residhotel Vieux Port se situe dans le cent...,43.296818,5.373057


In [16]:
fig = px.scatter_mapbox(hotel[hotel['Score'].notna()], lat="lat", lon="lon", 
                        zoom=7, size="Score", color="Score", mapbox_style="carto-positron", 
                        title = 'Top 20 hotels in our top 5 cities')
fig.show()