In [1]:
# import of libraries 
from sqlalchemy import create_engine, text
import pandas as pd
import plotly.express as px
import psycopg2
import os
from datetime import datetime
import json

In [2]:
# Call of our date in S3 
weather = pd.read_csv('s3://booking-scapping/city_weather_by_day.csv')
hotel = pd.read_csv('s3://booking-scapping/hotels_info.csv')

In [5]:
# gets our secrets for connection at RDS

import json
with open("./secrets.json") as f:
    secrets = json.load(f)


In [6]:
# send to our DB

#engine = create_engine("sqlite:///:memory:", echo=True)
engine = create_engine(f'postgresql+psycopg2://{secrets["DBUSER"]}:{secrets["DBPASS"]}@{secrets["DBHOST"]}')

weather.to_sql(
    f"city_weather_top_5",
    engine,
    if_exists='replace'
)

hotel.to_sql(
    "hotel",
    engine,
    if_exists='replace'
)

100

## Call tables from our BD and do visualization 

In [7]:
# Weather table 

stmt = text("SELECT * FROM city_weather_top_5 "
            )

weather = pd.read_sql(
        stmt,
        engine
    )

weather.head()

Unnamed: 0.1,index,Unnamed: 0,city,dt_object,main_weather,prepcipitation,temperature,latitude,longitude
0,0,12,Amiens,2023-02-20,Clouds,0.0,8.796667,49.894171,2.295695
1,1,13,Amiens,2023-02-21,Clouds,0.0,8.875,49.894171,2.295695
2,2,14,Amiens,2023-02-22,Clouds,2.38,8.33,49.894171,2.295695
3,3,15,Amiens,2023-02-23,Clouds,0.01,5.5875,49.894171,2.295695
4,4,16,Amiens,2023-02-24,Clouds,0.55,3.69375,49.894171,2.295695


In [9]:
# Add a proportinal colum on temperature for better see the variation 
weather['temp_cube'] = weather['temperature']**3
weather.head()

weather['dt_object'] = weather['dt_object'].astype(str)

In [10]:
fig = px.scatter_mapbox(weather, lat="latitude", lon="longitude",
                        color="temperature", size ='temp_cube', zoom=3, mapbox_style="carto-positron",
                        animation_frame = "dt_object", animation_group = 'temperature',
                        title="Top 5 best weather cities")
fig.show()

In [11]:
# Hotel info
stmt = text("SELECT * FROM hotel "
            )

df = pd.read_sql(
        stmt,
        engine
    )

df.head()

Unnamed: 0.1,index,Unnamed: 0,hotel name,Url to its booking.com page,Score,Text description,lat,lon
0,0,0,MISTRAL Gagnant,https://www.booking.com/hotel/fr/mistral-cassi...,8.1,"Situé à 5 km du cap Canaille à Cassis, le MIST...",43.21537,5.53719
1,1,1,UNE TERRASSE SUR LA MER 3 pers 2 chambres VUE MER,https://www.booking.com/hotel/fr/une-terrasse-...,8.4,"Situé à Cassis, à seulement 400 mètres de Best...",43.214072,5.530622
2,2,2,Home Cassis - Maison Julou - Piscine chauffée,https://www.booking.com/hotel/fr/home-cassis-m...,7.6,"Offrant une vue sur la montagne, l'établisseme...",43.22208,5.537927
3,3,3,"Cassis, le Grand Bleu, triplex vue mer, port p...",https://www.booking.com/hotel/fr/le-grand-bleu...,9.0,"Situé à Cassis, l’hébergement Cassis, le Grand...",43.220804,5.542296
4,4,4,SunRise Cassis,https://www.booking.com/hotel/fr/sunrise-cassi...,9.5,"Doté d'un jardin, d'une terrasse et d'un casin...",43.215219,5.535682


In [12]:
fig = px.scatter_mapbox(hotel[hotel['Score'].notna()], lat="lat", lon="lon", 
                        zoom=3, size="Score", color="Score", mapbox_style="carto-positron", 
                        title = 'Top 20 hotels in our top 5 cities')
fig.show()