In [2]:
#Here I will be looking at the Recruit Restaurant Visitor Forecasting dataset from Kaggle

In [1]:
# From data source:

# -Hot Pepper Gourmet (hpg): similar to Yelp, here users can search restaurants and also make a reservation online

# -AirREGI / Restaurant Board (air): similar to Square, a reservation control and cash register system

# Files:
    
# air_reserve.csv - This file contains reservations made in the air system. Note that the reserve_datetime indicates the time when the reservation was created, whereas the visit_datetime is the time in the future where the visit will occur.

# hpg_reserve.csv - This file contains reservations made in the hpg system.

# air_store_info.csv - This file contains information about select air restaurants. Column names and contents are self-explanatory.

# hpg_store_info.csv - This file contains information about select hpg restaurants. Column names and contents are self-explanatory.

# store_id_relation.csv - This file allows you to join select restaurants that have both the air and hpg system.

# air_visit_data.csv - This file contains historical visit data for the air restaurants.

# date_info.csv - This file gives basic information about the calendar dates in the dataset.


In [2]:
import os,sys
import json
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import descartes
import geopandas as gpd

In [3]:
air_reserve = pd.read_csv('../Datasets/restaurants_japan/air_reserve.csv')
air_store = pd.read_csv('../Datasets/restaurants_japan/air_store_info.csv')
hpg_reserve = pd.read_csv('../Datasets/restaurants_japan/hpg_reserve.csv')
hpg_store = pd.read_csv('../Datasets/restaurants_japan/hpg_store_info.csv')
air_visit = pd.read_csv('../Datasets/restaurants_japan/air_visit_data.csv')
id_rel = pd.read_csv('../Datasets/restaurants_japan/store_id_relation.csv')
date_info = pd.read_csv('../Datasets/restaurants_japan/date_info.csv')

In [4]:
# We can combine the air and hpg csvs into singular dataframes

In [5]:
df_hpg = pd.merge(hpg_reserve, hpg_store, on='hpg_store_id')
df_air = pd.merge(air_reserve, air_store, on='air_store_id')

In [6]:
# Change dates to ensure datetime objects
df_air['visit_datetime'] = pd.to_datetime(df_air['visit_datetime'])
df_air['reserve_datetime'] = pd.to_datetime(df_air['reserve_datetime'])
df_hpg['visit_datetime'] = pd.to_datetime(df_hpg['visit_datetime'])
df_hpg['reserve_datetime'] = pd.to_datetime(df_hpg['reserve_datetime'])

In [7]:
df_air.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 92378 entries, 0 to 92377
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   air_store_id      92378 non-null  object        
 1   visit_datetime    92378 non-null  datetime64[ns]
 2   reserve_datetime  92378 non-null  datetime64[ns]
 3   reserve_visitors  92378 non-null  int64         
 4   air_genre_name    92378 non-null  object        
 5   air_area_name     92378 non-null  object        
 6   latitude          92378 non-null  float64       
 7   longitude         92378 non-null  float64       
dtypes: datetime64[ns](2), float64(2), int64(1), object(3)
memory usage: 6.3+ MB


In [8]:
df_hpg.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 871308 entries, 0 to 871307
Data columns (total 8 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   hpg_store_id      871308 non-null  object        
 1   visit_datetime    871308 non-null  datetime64[ns]
 2   reserve_datetime  871308 non-null  datetime64[ns]
 3   reserve_visitors  871308 non-null  int64         
 4   hpg_genre_name    871308 non-null  object        
 5   hpg_area_name     871308 non-null  object        
 6   latitude          871308 non-null  float64       
 7   longitude         871308 non-null  float64       
dtypes: datetime64[ns](2), float64(2), int64(1), object(3)
memory usage: 59.8+ MB


In [9]:
# Counting NaNs - HPG
df_hpg.isnull().sum()

hpg_store_id        0
visit_datetime      0
reserve_datetime    0
reserve_visitors    0
hpg_genre_name      0
hpg_area_name       0
latitude            0
longitude           0
dtype: int64

In [10]:
# Counting NaNs - air
df_air.isnull().sum()

air_store_id        0
visit_datetime      0
reserve_datetime    0
reserve_visitors    0
air_genre_name      0
air_area_name       0
latitude            0
longitude           0
dtype: int64

In [11]:
# Non nan values -> happy days!

In [12]:
# Let us map this stuff

In [13]:
df_air.head()

Unnamed: 0,air_store_id,visit_datetime,reserve_datetime,reserve_visitors,air_genre_name,air_area_name,latitude,longitude
0,air_877f79706adbfb06,2016-01-01 19:00:00,2016-01-01 16:00:00,1,Japanese food,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599
1,air_877f79706adbfb06,2016-01-01 20:00:00,2016-01-01 16:00:00,2,Japanese food,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599
2,air_877f79706adbfb06,2016-01-02 18:00:00,2016-01-01 16:00:00,2,Japanese food,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599
3,air_877f79706adbfb06,2016-01-02 21:00:00,2016-01-01 16:00:00,2,Japanese food,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599
4,air_877f79706adbfb06,2016-01-08 21:00:00,2016-01-04 20:00:00,2,Japanese food,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599


In [14]:
df_hpg.head()

Unnamed: 0,hpg_store_id,visit_datetime,reserve_datetime,reserve_visitors,hpg_genre_name,hpg_area_name,latitude,longitude
0,hpg_dac72789163a3f47,2016-01-01 13:00:00,2016-01-01 06:00:00,3,French,Hyōgo-ken Kōbe-shi None,34.692109,135.191698
1,hpg_dac72789163a3f47,2016-01-02 12:00:00,2016-01-01 20:00:00,2,French,Hyōgo-ken Kōbe-shi None,34.692109,135.191698
2,hpg_dac72789163a3f47,2016-01-03 19:00:00,2016-01-02 15:00:00,2,French,Hyōgo-ken Kōbe-shi None,34.692109,135.191698
3,hpg_dac72789163a3f47,2016-01-06 12:00:00,2016-01-06 08:00:00,2,French,Hyōgo-ken Kōbe-shi None,34.692109,135.191698
4,hpg_dac72789163a3f47,2016-01-10 17:00:00,2016-01-04 22:00:00,3,French,Hyōgo-ken Kōbe-shi None,34.692109,135.191698


In [88]:
japan_map = gpd.read_file("../Datasets/restaurants_japan/Igismap/Japan_Boundary.shp")

In [15]:
# # Define a base map with county boundaries
# fig,ax = plt.subplots(figsize = (100,100))
# japan_map.plot(ax = ax)

In [19]:
from geopandas import GeoDataFrame
from shapely.geometry import Point

geometry = [Point(xy) for xy in zip(df_air.longitude, df_air.latitude)]
df_test = df_air.drop(['longitude', 'latitude'], axis=1)
gdf = GeoDataFrame(df_test, crs="EPSG:4326", geometry=geometry)

In [16]:
# import plotly.express as px
# fig = px.scatter_geo(gdf,
#                     lat=gdf.geometry.y,
#                     lon=gdf.geometry.x,)
# fig.show()

In [23]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Scattergeo(
        lon = gdf.geometry.x,
        lat = gdf.geometry.y,
        text = 'air_store_id',
        mode = 'markers',
        ))

fig.update_layout(
        title = 'TEST',
        geo_scope='asia',
    )
# focus point
lat_foc = 35
lon_foc = 136
fig.update_layout(
        geo = dict(
            projection_scale=5, #this is kind of like zoom
            center=dict(lat=lat_foc, lon=lon_foc), # this will center on the point
        ))
fig.show()