# Load dependencies

In [None]:
from google.cloud import bigquery
import datetime
import pandas as pd
import numpy as np

In [None]:
client = bigquery.Client()

In [None]:
limits = 'LIMIT 15000'
sql_events = f"""
WITH geovisits_h3_indexes AS (
    SELECT *, `carto-os-eu`.h3.LONGLAT_ASH3(longitude, latitude, 10) h3_idx
    FROM `ggo-ppos-bqgis.singlespot.geovisits_paris_matview`
)
select sptId, uuid, latitude, longitude, accuracy, eventId,  
arrival, departure, score, rank, category, feature, placeName, h3_idx 
FROM geovisits_h3_indexes {limits}
"""

In [None]:
rows = client.query(sql_events).result()
df = rows.to_dataframe()
df.head()

In [None]:
df['sptId'] = df['sptId'].astype(str)
df['category'] = df['category'].astype(str)
df['feature'] = df['feature'].astype(str)
df['arrival'] = pd.to_datetime(df['arrival'], format='%Y-%m-%d %H:%M:%S', utc=True)
df['depature'] = pd.to_datetime(df['departure'], format='%Y-%m-%d %H:%M:%S', utc=True)
df.info()

In [None]:
df_w = pd.DataFrame(df[['h3_idx', 'category', 'feature']])
df_w['count'] = 1
df_w.head()


In [None]:
df_w['category'].unique()

In [None]:
filtered_categories = ['amenity', 'shop', 'user']
df_w['category_2'] = np.where(df_w['category'].isin(filtered_categories), df_w['category'], 'others')
df_w['feature_2'] = np.where(df_w['category'].isin(filtered_categories), df_w['feature'], 'others')
df_w.head()
#amenities_shops[amenities_shops['category'] =='amenity']['feature'].unique()

In [None]:
print(df_w['category_2'].unique())
filtered_categories = [ 'shop']
print(df_w[df_w['category_2']=='shop']['feature_2'].unique())


In [None]:
amenities_features = df_w[df_w['category_2']=='amenity']['feature_2'].unique()
print(amenities_features)

In [None]:
amenities_features = df_w[df_w['category_2']=='user']['feature_2'].unique()
print(amenities_features)

In [None]:
amenities_features = df_w[df_w['category_2']=='shop']['feature_2'].unique()
print(amenities_features)

In [None]:
df_w['category_2'].unique()

In [None]:
grouped_columns = ['category_2', 'feature_2']

In [None]:
h3_pivot = pd.DataFrame(pd.pivot_table(df_w,  columns=grouped_columns, index='h3_idx', values="count", aggfunc='sum', fill_value=0, margins = True, margins_name='total')).reset_index()
h3_pivot.columns = h3_pivot.columns.map('_'.join).str.replace('len_', '')

h3_pivot.rename(columns={ h3_pivot.columns[0]: "h3_idx" }, inplace=True)
h3_pivot.head()

In [None]:
indexNames = h3_pivot[h3_pivot['h3_idx'] == 'total'].index
h3_pivot.drop(indexNames, inplace=True)

In [None]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook_connected"
import matplotlib.pyplot as plt

In [None]:
df_names = h3_pivot.columns
df_names = df_names[1:]
df_names

In [None]:
import seaborn as sns
h3_pivot_corr=h3_pivot[df_names]
correlations= h3_pivot_corr.corr()
mask = np.zeros_like(correlations)  # make mask
mask[np.triu_indices_from(mask)] = True  # mask the upper triangle
fig, ax = plt.subplots(figsize=(11, 9))  # create a figure and a subplot
cmap = sns.diverging_palette(220, 10, as_cmap=True)  # custom color map
"""
sns.heatmap(
    correlations,
    mask=mask,
    cmap=cmap,
    center=0,
    linewidth=0.5,
    cbar_kws={'shrink': 0.5}
)
"""
sns.heatmap(
    correlations,
    annot=True
)