### Save files in S3

In [1]:
import pandas as pd, requests
import os 
import logging
from typing import Dict, Tuple, List
import operator
import numpy as np
from datetime import datetime as dt
from datetime import date
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style()

In [1]:
from box import Box
import yaml # Only required for different loaders
import pandas as pd

### Upload configuration file

In [5]:
!pwd

/home/carmina/repo/pez-jedha/projects/03-kayak/s3


In [31]:
conf = Box.from_yaml(filename="./config.yaml")

### Create file booking_result.csv in S3 bucket

In [20]:
import boto3
session = boto3.Session(aws_access_key_id=conf.accessKeyId, aws_secret_access_key=conf.secretAccessKey)

In [21]:
s3 = session.resource("s3")

In [22]:
#s3.create_bucket(Bucket="a-bucket-name")

In [23]:
bucket = s3.Bucket('kayak-bucket')


In [24]:
result = bucket.upload_file('../data/booking_result.csv','booking_result.csv')

print(result)


None


### Create file weather.csv in S3 bucket

In [25]:
result = bucket.upload_file('../data/weather.csv','weather.csv')
print(result)


None


### ETL: Process files

In [None]:
import botocore

In [28]:
OBJECT_KEY = 'booking_result.csv' 

def download_file(object_key, download_path):
    try:
        bucket.download_file(object_key, download_path)
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print("The object does not exist.")
        else:
            raise

In [None]:
download_file(OBJECT_KEY, f'../data/s3_download/{OBJECT_KEY}')

In [29]:
download_file('weather.csv', f'../data/s3_download/weather.csv')

### Create objects for City and Hotel

In [32]:
from sqlalchemy import create_engine

In [1]:
#print(f'postgresql+psycopg2://{conf.userRDS}:{conf.mdp}@{conf.endpoint}/cert-db')

In [68]:
engine = create_engine(f'postgresql+psycopg2://{conf.userRDS}:{conf.mdp}@{conf.endpoint}/kayak')

In [36]:
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

In [37]:
from sqlalchemy import Column, Integer, String , Float 

class City(Base):
    __tablename__ = "CITY"

    place_id = Column(String, primary_key=True)
    place = Column(String)
    lat = Column(Float)
    lon = Column(Float)

    def __repr__(self):
        return "<CITY(place_id='{}', place='{}', lat='{}, lon={} ')>".format(self.place_id, self.place, self.lat, self.lon)

In [38]:
class Hotel(Base):
    __tablename__ = "HOTEL"

    hotel_id = Column(Integer, primary_key=True)
    place_id = Column(String, )
    
    place = Column(String)
    lat = Column(Float)
    lon = Column(Float)

    def __repr__(self):
        return "<CITY(place_id='{}', place='{}', lat='{}, lon={} ')>".format(self.place_id, self.place, self.lat, self.lon)

### Persist values in Database

Sessionmaker  will allow to talk to our database. The `bind` argument takes an `engine` as parameter which corresponds to the database. 

In [58]:
cities_df = pd.read_csv('../data/cities.csv', sep='\t' )

In [59]:
cities_df = cities_df.drop(columns= 'Unnamed: 0')
cities_df = cities_df.astype({'place_id': 'str'})
cities_df.head(5)

Unnamed: 0,place,place_id,licence,osm_type,osm_id,boundingbox,lat,lon,display_name,place_rank,category,type,importance,icon
0,Mont Saint Michel,151486647,"Data © OpenStreetMap contributors, ODbL 1.0. h...",way,211285890,"['48.6349172', '48.637031', '-1.5133292', '-1....",48.635954,-1.51146,"Mont Saint-Michel, Le Mont-Saint-Michel, Avran...",20,place,islet,0.865437,
1,St Malo,282098015,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,905534,"['48.5979853', '48.6949736', '-2.0765246', '-1...",48.649518,-2.026041,"Saint-Malo, Ille-et-Vilaine, Bretagne, France ...",16,boundary,administrative,0.786467,https://nominatim.openstreetmap.org/ui/mapicon...
2,Bayeux,281962470,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,145776,"['49.2608124', '49.2934736', '-0.7275671', '-0...",49.276462,-0.702474,"Bayeux, Calvados, Normandie, France métropolit...",16,boundary,administrative,0.7927,https://nominatim.openstreetmap.org/ui/mapicon...
3,Le Havre,282341149,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,104492,"['49.4516697', '49.5401463', '0.0667992', '0.1...",49.493898,0.107973,"Le Havre, Seine-Maritime, Normandie, France mé...",16,boundary,administrative,0.932333,https://nominatim.openstreetmap.org/ui/mapicon...
4,Rouen,122848,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,26686587,"['49.2804591', '49.6004591', '0.9339658', '1.2...",49.440459,1.093966,"Rouen, Seine-Maritime, Normandie, France métro...",16,place,city,0.860073,https://nominatim.openstreetmap.org/ui/mapicon...


In [70]:
cities_df.to_sql('CITY', con=engine, if_exists='append', index=False, index_label='place_id')

In [72]:
hotels_df = pd.read_csv('../data/booking_result.csv', sep=',')


In [79]:
cities_red_df = cities_df[['place_id','place']]

In [81]:
joined_city_hotel = pd.merge(left=hotels_df, right=cities_red_df, how='inner', left_on='city', right_on='place', suffixes=(None, '_y'))
joined_city_hotel


Unnamed: 0,city,name,url,latitude,longitude,desc,etoiles,note,reviews,place_id,place
0,Mont Saint Michel,Mercure Mont Saint Michel,/hotel/fr/mont-saint-michel.fr.html,-1.510545,48.614247,Installé dans des espaces verts à seulement 2 ...,4.0,82,3 140,151486647,Mont Saint Michel
1,Mont Saint Michel,Hôtel Vert,/hotel/fr/vert.fr.html,-1.509617,48.614700,"Situé à 2 km du Mont-Saint-Michel, sur la côte...",2.0,81,3 670,151486647,Mont Saint Michel
2,Mont Saint Michel,Hotel De La Digue,/hotel/fr/de-la-digue.fr.html,-1.510918,48.616882,L'hôtel De La Digue est un établissement tradi...,3.0,72,1 926,151486647,Mont Saint Michel
3,Mont Saint Michel,Le Saint Aubert,/hotel/fr/hotel-saint-aubert.fr.html,-1.510105,48.612938,"Niché dans un écrin de verdure, à seulement 2 ...",3.0,74,1 257,151486647,Mont Saint Michel
4,Mont Saint Michel,La Vieille Auberge,/hotel/fr/la-vieille-auberge-le-mont-saint-mic...,-1.511457,48.636063,La Vieille Auberge se trouve dans le village m...,2.0,75,1 260,151486647,Mont Saint Michel
...,...,...,...,...,...,...,...,...,...,...,...
7209,La Rochelle,La Blanche La Rochelle Vue sur les Tours,/hotel/fr/la-blanche-la-rochelle-vue-sur-les-t...,-1.151293,46.158128,Hébergement La Blanche - Vue sur Rochelle L'ap...,,,,281822562,La Rochelle
7210,La Rochelle,Full Moon,/hotel/fr/full-moon.fr.html,-1.151186,46.152663,"Installé à La Rochelle, le Full Moon est un vo...",,,,281822562,La Rochelle
7211,La Rochelle,BEAU STUDIO MODERNE 5MN CENTRE VILLE +PARKING ...,/hotel/fr/beau-studio-moderne-5mn-centre-ville...,-1.138740,46.160229,BEAU STUDIO MODERNE 5MN CENTRE VILLE +PARKING ...,,,,281822562,La Rochelle
7212,La Rochelle,Maison agréable 110m2 avec jardin et jacuzzi,/hotel/fr/maison-agreable-110m2-avec-jardin-et...,-1.180585,46.165280,"Située à La Rochelle, à seulement 3 km du Chef...",,,,281822562,La Rochelle


In [84]:
new_columns = ['place_id',
 'place',
 'name',
 'url',
 'latitude',
 'longitude',
 'desc',
 'etoiles',
 'note',
 'reviews']
joined_city_hotel = joined_city_hotel[new_columns]

In [85]:
joined_city_hotel.to_sql('HOTEL', con=engine, if_exists='append', index=True)