In [1]:
# import of libraries 
from sqlalchemy import create_engine, text
import pandas as pd
import plotly.express as px
import psycopg2
import os
from datetime import datetime
import json

In [2]:
# Call of our date in S3 
weather = pd.read_csv('s3://booking-scapping/city_weather_by_day.csv')
hotel = pd.read_csv('s3://booking-scapping/hotels_info.csv')

In [3]:
# gets our secrets for connection at RDS

import json
with open("./secrets.json") as f:
    secrets = json.load(f)


In [4]:
# send to our DB

#engine = create_engine("sqlite:///:memory:", echo=True)
engine = create_engine(f'postgresql+psycopg2://{secrets["DBUSER"]}:{secrets["DBPASS"]}@{secrets["DBHOST"]}')

weather.to_sql(
    f"city_weather_top_5",
    engine,
    if_exists='replace'
)

hotel.to_sql(
    "hotel",
    engine,
    if_exists='replace'
)

100

## Call tables from our BD and do visualization 

In [5]:
# Weather table 

stmt = text("SELECT * FROM city_weather_top_5 "
            )

weather = pd.read_sql(
        stmt,
        engine
    )

weather.head()

Unnamed: 0.1,index,Unnamed: 0,city,dt_object,main_weather,precipitation,temperature,latitude,longitude
0,0,12,Amiens,2023-02-20,Clouds,0.0,9.166667,49.894171,2.295695
1,1,13,Amiens,2023-02-21,Clouds,0.0,8.875,49.894171,2.295695
2,2,14,Amiens,2023-02-22,Clouds,2.38,8.33,49.894171,2.295695
3,3,15,Amiens,2023-02-23,Clouds,0.01,5.5875,49.894171,2.295695
4,4,16,Amiens,2023-02-24,Clouds,0.55,3.69375,49.894171,2.295695


In [6]:
# Add a proportinal colum on temperature for better see the variation 
weather['temp_cube'] = weather['temperature']**3
weather.head()

weather['dt_object'] = weather['dt_object'].astype(str)

In [7]:
fig = px.scatter_mapbox(weather, lat="latitude", lon="longitude",
                        color="temperature", size ='temp_cube', zoom=3, mapbox_style="carto-positron",
                        animation_frame = "dt_object", animation_group = 'temperature',
                        title="Top 5 best weather cities")
fig.show()

In [8]:
# Hotel info
stmt = text("SELECT * FROM hotel "
            )

df = pd.read_sql(
        stmt,
        engine
    )

df.head()

Unnamed: 0.1,index,Unnamed: 0,hotel name,Url to its booking.com page,Score,Text description,lat,lon
0,0,0,Odalys City Amiens Blamont,https://www.booking.com/hotel/fr/appart-39-oda...,8.0,"Situé à 2,8 km du Zénith d'Amiens et à 900 mèt...",49.887148,2.311611
1,1,1,L'AMIE'NOIS - 6 COUCHAGES - JARDIN - WiFi,https://www.booking.com/hotel/fr/appt-en-amien...,8.0,"Situé à 1,9 km du Zénith d'Amiens et à 3,9 km ...",49.888011,2.264935
2,2,2,Au Coeur d'Amiens,https://www.booking.com/hotel/fr/maison-au-coe...,9.2,"Situé à Amiens, à seulement 1 km de la gare, l...",49.896155,2.306368
3,3,3,Gite Amiens,https://www.booking.com/hotel/fr/gite-amiens.f...,8.5,"Situé à Amiens, à seulement 5 minutes à pied d...",49.902358,2.314285
4,4,4,Le Cottage des Hortillonnages,https://www.booking.com/hotel/fr/le-cottage-de...,9.3,Le Cottage des Hortillonnages est situé à Amie...,49.897552,2.316359


In [9]:
fig = px.scatter_mapbox(hotel[hotel['Score'].notna()], lat="lat", lon="lon", 
                        zoom=3, size="Score", color="Score", mapbox_style="carto-positron", 
                        title = 'Top 20 hotels in our top 5 cities')
fig.show()