# Análisis y limpieza de los datos del data set

In [125]:
#### ESTE ES UN EJEMPLO DEL USO DE LA API DE openweathermap ####

# Voy a usar estas dos localizaciones como test:
#  Yuma MCAS-Yuma International,Yuma,AZ,32.65658333,-114.6059722
#  The Sportsman,2019,51.34392,0.95885,Seasalter,United Kingdom

#lat = 51.34392
#lon = 0.95885
#api_key = "xxx"

#url = 'http://api.openweathermap.org/data/2.5/onecall?lat=32.65658333&lon=-114.6059722&APPID=xxx'
#url = f'http://api.openweathermap.org/data/2.5/onecall?lat={lat}&lon={lon}&APPID={api_key}'

#res = r.get(url)
#data = res.json()

#print(data)


In [126]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv

# 1 - obtener datos de los csv
# 2 - juntar los csv con un campo que indique las estrellas
# 3 - hacer comandos
# 3.1 - comando que consulte la API por un restaurante

df_one_star = pd.read_csv('input/one-star-michelin-restaurants.csv')
df_two_stars = pd.read_csv('input/two-stars-michelin-restaurants.csv')
df_three_stars = pd.read_csv('input/three-stars-michelin-restaurants.csv')

In [127]:
print(df_one_star.shape)
print(df_two_stars.shape)
print(df_three_stars.shape)

(549, 10)
(110, 10)
(36, 10)


## Junto los tres data sets

In [128]:
df_one_star["stars"] = 1
df_two_stars["stars"] = 2
df_three_stars["stars"] = 3

In [129]:
df_all = pd.concat([df_one_star, df_two_stars, df_three_stars])
df_all.shape

(695, 11)

## Análisis del data set completo

In [130]:
print(df_all.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 695 entries, 0 to 35
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   name       695 non-null    object 
 1   year       695 non-null    int64  
 2   latitude   695 non-null    float64
 3   longitude  695 non-null    float64
 4   city       693 non-null    object 
 5   region     695 non-null    object 
 6   zipCode    501 non-null    object 
 7   cuisine    695 non-null    object 
 8   price      519 non-null    object 
 9   url        695 non-null    object 
 10  stars      695 non-null    int64  
dtypes: float64(2), int64(2), object(7)
memory usage: 65.2+ KB
None


In [131]:
# No hay datos duplicados
df_all.duplicated().value_counts()

False    695
dtype: int64

In [132]:
# Algunos restaurantes tienen la ciudad a nulo
df_all[df_all["city"].isnull()]

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,stars
152,Épure,2019,22.29583,114.169304,,Hong Kong,,French,$$$,https://guide.michelin.com/hk/en/hong-kong-reg...,1
166,Arbor,2019,22.283146,114.15542,,Hong Kong,,Innovative,$$$,https://guide.michelin.com/hk/en/hong-kong-reg...,1


In [133]:
# Hay datos de los años 2018 y 2019
df_all["year"].unique()

array([2019, 2018])

In [233]:
# La mayoría de restaurantes son del año 2019
df_all["year"].value_counts()

2019    656
2018     39
Name: year, dtype: int64

In [237]:
# La columna "price" tiene valores nulos
df_all["price"].unique()

array(['$$$$$', '$$$$', '$$$', '$$', '$', nan], dtype=object)

In [135]:
# Los valore nulos en la columna "price" están en las regions "United Kingdom" e "Ireland"
df_all[df_all["price"].isnull()]["region"].value_counts()

United Kingdom    162
Ireland            14
Name: region, dtype: int64

### Restaurantes que aparecen varias veces un mismo año

In [136]:
# Hay algunos restaurantes que aparecen varias veces en un mismo año
df_all.groupby("name").agg({"year": "count"})["year"].sort_values(ascending=False)

name
L'Atelier de Joël Robuchon    3
Sorrel                        2
The Kitchen                   2
Blackbird                     2
Angler                        2
                             ..
Purnell's                     1
Providence                    1
Protégé                       1
Pramerl & the Wolf            1
108                           1
Name: year, Length: 684, dtype: int64

In [137]:
# Este en particular parece que tiene sedes en tres ciudades del mundo y con distinta valoración de estrellas
df_all[df_all["name"] == "L'Atelier de Joël Robuchon"]

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,stars
360,L'Atelier de Joël Robuchon,2019,25.039188,121.56771,Taipei,Taipei,110.0,French contemporary,$$$,https://guide.michelin.com/tw/en/taipei-region...,1
47,L'Atelier de Joël Robuchon,2019,40.742905,-74.00769,New York,New York City,,French,$$$$,https://guide.michelin.com/us/en/new-york-stat...,2
15,L'Atelier de Joël Robuchon,2019,22.281199,114.15816,Hong Kong,Hong Kong,,French contemporary,$$$$,https://guide.michelin.com/hk/en/hong-kong-reg...,3


In [138]:
# Puede que sean restaurantes diferentes pero a la hora de seleccionar por nombre aparecerán dos restaurantes
df_all[df_all["name"] == "The Kitchen"]

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,stars
57,The Kitchen,2019,38.58894,-121.41424,Sacramento,California,95825.0,Contemporary,$$$$,https://guide.michelin.com/us/en/california/us...,1
186,The Kitchen,2019,22.191442,113.543,Macau,Macau,,Steakhouse,$$$,https://guide.michelin.com/mo/en/macau-region/...,1


### Restaurantes que aparecen en 2019 y 2018

In [139]:
# Hay un restaurante que aparece en 2018 y en 2019
df_all[df_all["year"] == 2018][["name"]].merge(df_all[df_all["year"] == 2019][["name"]])

Unnamed: 0,name
0,Summer Palace


In [140]:
# Parece que tiene dos sedes en ciudades diferentes
df_all[df_all["name"] == "Summer Palace"]

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,stars
164,Summer Palace,2019,22.277136,114.1643,Hong Kong,Hong Kong,,Cantonese,$$$,https://guide.michelin.com/hk/en/hong-kong-reg...,1
333,Summer Palace,2018,1.304385,103.825,Singapore,Singapore,,Cantonese,$,https://guide.michelin.com/sg/en/singapore-reg...,1


# Hora de juntar datos del dataframe con los de la api
Los campos por los que juntar son:
- latitude
- longitude

In [141]:
# Voy a probar con datos de este último restaurante:
df_test = df_all[df_all["name"] == "Summer Palace"]

In [142]:
load_dotenv()
key = os.getenv("OPENWEATHERMAP_APIKEY")

In [231]:
units = "metric" # Temperatura en Celsius

list_of_restaurants = []

for name, city, region, cuisine, price, url, stars, latitude, longitude in df_test[["name", "city", "region", "cuisine", "price", "url", "stars", "latitude", "longitude"]].itertuples(index=False):
    url = f'http://api.openweathermap.org/data/2.5/onecall?lat={latitude}&lon={longitude}&units={units}&APPID={key}'

    response = r.get(url)
    
    data = response.json()
    
    restaurant = {'name': name,
                 'city': city,
                 'region': region,
                 'cuisine': cuisine,
                 'price': price,
                 'url': url,
                 'stars': stars,
                 'current_temp': data['current']['temp'],
                 'current_feels_like': data['current']['feels_like']
                 }

    try:
        if data['current']['rain']:
            restaurant['rain_1h'] = data['current']['rain']['1h']
    except KeyError:
        restaurant['rain_1h'] = "En esta localización no hay información sobre previsión de lluvia."

    try:
        if data['current']['weather']:
            restaurant['weather'] = data['current']['weather'][0]['main']
            restaurant['weather_description'] = data['current']['weather'][0]['description']
    except KeyError:
        print("---")
        print("Test2")
        print("---")
        restaurant['weather'] = "En esta localización no hay información meteorológica."
        restaurant['weather_description'] = "En esta localización no hay información meteorológica."

    list_of_restaurants.append(restaurant)

---
200
---
---
Test1
---
---
Test3
---
---
200
---
---
Test1
---
---
Test3
---


In [232]:
for restaurant in list_of_restaurants:
    print(restaurant)

{'name': 'Summer Palace', 'city': 'Hong Kong', 'region': 'Hong Kong', 'cuisine': 'Cantonese', 'price': '$$$', 'url': 'http://api.openweathermap.org/data/2.5/onecall?lat=22.277136&lon=114.1643&units=metric&APPID=0964509b18e74504c35ad703745a7e91', 'stars': 1, 'current_temp': 27.78, 'current_feels_like': 31.51, 'rain_1h': 0.13, 'weather': 'Rain', 'weather_description': 'light rain'}
{'name': 'Summer Palace', 'city': 'Singapore', 'region': 'Singapore', 'cuisine': 'Cantonese', 'price': '$', 'url': 'http://api.openweathermap.org/data/2.5/onecall?lat=1.3043848&lon=103.825&units=metric&APPID=0964509b18e74504c35ad703745a7e91', 'stars': 1, 'current_temp': 25, 'current_feels_like': 29.75, 'rain_1h': 'En esta localización no hay información sobre previsión de lluvia.', 'weather': 'Clouds', 'weather_description': 'broken clouds'}
