In [None]:
%load_ext google.cloud.bigquery

In [None]:
from google.cloud import bigquery
import datetime
import pandas as pd

In [None]:
client = bigquery.Client()

In [None]:
limits = 'LIMIT 15000'
limits=''
sql_events = f"""
WITH geovisits_h3_indexes AS (
    SELECT *, `carto-os-eu`.h3.LONGLAT_ASH3(longitude, latitude, 10) h3_idx
    FROM `ggo-ppos-bqgis.singlespot.geovisits_paris_matview`
)
select sptId, uuid, latitude, longitude, accuracy, eventId,  
arrival, departure, score, rank, category, feature, placeName, h3_idx 
FROM geovisits_h3_indexes {limits}
"""

In [None]:
rows = client.query(sql_events).result()
df = rows.to_dataframe()
df.head()

In [None]:
df['sptId'] = df['sptId'].astype(str)
df['category'] = df['category'].astype(str)
df['feature'] = df['feature'].astype(str)
df['arrival'] = pd.to_datetime(df['arrival'], format='%Y-%m-%d %H:%M:%S', utc=True)
df['depature'] = pd.to_datetime(df['departure'], format='%Y-%m-%d %H:%M:%S', utc=True)
df.info()

In [None]:
""" 
h3_g = (df
                          .groupby('h3_idx')
                          .eventId
                          .agg(list)
                          .to_frame("ids")
                          .reset_index())
# Let's count each points inside the hexagon
h3_g['count'] =(h3_g['ids']
                      .apply(lambda ignition_ids:len(ignition_ids))) 
"""

h3_g = df.groupby('h3_idx').h3_idx.agg('count').to_frame('count').reset_index()

In [None]:
h3_g.head()

In [None]:
df_w = pd.DataFrame(df[['h3_idx', 'category']])
df_w['count'] = 1
df_w.head()


In [None]:
h3_pivot = pd.DataFrame(pd.pivot_table(df_w,  columns='category', index='h3_idx', values="count", aggfunc=[len], fill_value=0, margins = True, margins_name='total')).reset_index()
h3_pivot.columns = h3_pivot.columns.map('_'.join).str.replace('len_', '')
h3_pivot = h3_pivot.rename(
    columns={'h3_idx_': 'h3_idx'})
h3_pivot.head()

In [None]:
# 
indexNames = h3_pivot[h3_pivot['h3_idx'] == 'total'].index
h3_pivot.drop(indexNames, inplace=True)
h3_pivot[h3_pivot['h3_idx'] == 'total']
# h3_pivot.info()


In [None]:
from shapely.geometry import Polygon
from geojson.feature import *
import h3

In [None]:
def create_geometry(row, hex_id_field):
  points = h3.h3_to_geo_boundary(row[hex_id_field], True)
  return Polygon(points)

def hexagons_dataframe_to_geojson(df_hex, hex_id_field, value_field,file_output = None):

    list_features = []

    for i, row in df_hex.iterrows():
        feature = Feature(geometry = create_geometry(row, hex_id_field),
                          id = row[hex_id_field],
                          properties = {"value": row[value_field]})
        list_features.append(feature)

    feat_collection = FeatureCollection(list_features)

    if file_output is not None:
        with open(file_output, "w") as f:
            json.dump(feat_collection, f)

    else :
      return feat_collection

In [None]:
geojson_obj =hexagons_dataframe_to_geojson(h3_pivot, hex_id_field='h3_idx', value_field='total')

In [None]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook_connected"

In [None]:
fig = (px.choropleth_mapbox(
                    h3_g, 
                    geojson=geojson_obj, 
                    locations='h3_idx', 
                    color='count',
                    color_continuous_scale="Viridis",
                    range_color=(0,h3_g['count'].mean()),                  
                    mapbox_style='carto-positron',
                    zoom=12,
                    center = {"lat": 48.85432452980058, "lon": 2.3459243774414062},
                    opacity=0.7,
                    labels={'count':'# of visits '}))
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df_names = h3_pivot.columns
df_names = df_names[1:]
df_names

```python
""" 
h3_pivot_corr=h3_pivot[df_names]
correlations= h3_pivot_corr.corr()
fig=plt.figure()
ax=fig.add_subplot(111)
cax=ax.matshow(correlations,vmin=-1,vmax=1)
fig.colorbar(cax)
ticks=np.arange(0,len(df_names),1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(df_names)
ax.set_yticklabels(df_names)
plt.show() """
```

In [None]:
import seaborn as sns
h3_pivot_corr=h3_pivot[df_names]
correlations= h3_pivot_corr.corr()
mask = np.zeros_like(correlations)  # make mask
mask[np.triu_indices_from(mask)] = True  # mask the upper triangle
fig, ax = plt.subplots(figsize=(11, 9))  # create a figure and a subplot
cmap = sns.diverging_palette(220, 10, as_cmap=True)  # custom color map
"""
sns.heatmap(
    correlations,
    mask=mask,
    cmap=cmap,
    center=0,
    linewidth=0.5,
    cbar_kws={'shrink': 0.5}
)
"""
sns.heatmap(
    correlations,
    annot=True
)

In [None]:
correlations

In [None]:
import tensorflow as tf

In [None]:
X = h3_pivot_corr.drop('total',axis=1)
y = h3_pivot_corr['total']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Creating a model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout

In [None]:
model = Sequential()

model.add(Dense(8,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(3,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

# Training the model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

In [None]:
model.fit(x=X_train,y=y_train.values,
          validation_data=(X_test,y_test.values),
          batch_size=128,epochs=400, callbacks=[early_stop])

In [None]:
losses = pd.DataFrame(model.history.history)

In [None]:
losses.plot()

# Model evaluation

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error

In [None]:
predictions = model.predict(X_test)

In [None]:
mean_absolute_error(y_test,predictions)

In [None]:
np.sqrt(mean_squared_error(y_test,predictions))