# Visualising foursquare data and breaking it down into sectors

In [2]:
import json
import glob
import io
import os
import re
import itertools as iter
import numpy as np

# data
import pandas as pd
import shapely as shp
import geopandas as gpd
import matplotlib.pyplot as plt
import polars as pl
import geoarrow

# import shapely
import duckdb
import overturemaps
import h3
import scalenav
import scalenav.data as snd
import scalenav.scale_nav as sn
from scalenav.plotting import cmap
import scalenav.oop as snoo
import jinja2
from ipywidgets import HTML


# from scipy import io as io
# import nctoolkit as nc
# import xarray as xr
# import rioxarray as rx

import ibis as ib

from ibis import _

import ibis.selectors as s

ib.options.interactive = True
ib.options.graphviz_repr = True

# plots

from wordcloud import WordCloud

# from datashader import transfer_functions as tf, reductions as rd
import pypalettes as pypal
import pydeck as pdk
from seaborn import color_palette

Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.


## Color palettes 

In [3]:
from parameters import *

# plot_limit = 10_000

# # continuous palette
# count_palette_name = "OrYel"

# count_pal = pypal.load_cmap(count_palette_name)

# # categorical palette
# cat_pal_name = "Bold"

# cat_pal = pypal.load_cmap(name=cat_pal_name)

# map_limits = [-168.8,-56.9,189.8,77.7]

## loading data 

In [None]:
conn = snoo.sn_connect()

In [7]:
places = conn.read_parquet("/Users/cenv1069/Documents/data/datasets/foursquare/raw/places/*")

In [8]:
places.head(10)

In [9]:
places = places.filter(
    _.longitude>map_limits[0],
    _.latitude>map_limits[1],
    _.longitude<map_limits[2],
    _.latitude<map_limits[3],
    )

In [10]:
places.columns

['fsq_place_id',
 'name',
 'latitude',
 'longitude',
 'address',
 'locality',
 'region',
 'postcode',
 'admin_region',
 'post_town',
 'po_box',
 'country',
 'date_created',
 'date_refreshed',
 'date_closed',
 'tel',
 'website',
 'email',
 'facebook_id',
 'instagram',
 'twitter',
 'fsq_category_ids',
 'fsq_category_labels']

In [11]:
places = places.select("fsq_place_id","name","latitude","longitude","locality","region","country","fsq_category_ids","fsq_category_labels")

In [12]:
# ib.to_sql(places)

In [13]:
h3_res = 5

In [None]:
places_h3 = snoo.h3_project(places,res=h3_res)

# 
# places.alias("b").sql(f"""
# Select *, h3_h3_to_string(h3_latlng_to_cell(latitude,longitude,{h3_res})) as h3_id
# from b;
# """)

places_h3.head()

Assuming coordinates columns ('longitude','latitude')


In [15]:
# ib.to_sql(places_h3)

In [16]:
places_h3.select("h3_id").nunique()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌────────┐
│ [1;36m238100[0m │
└────────┘

## Closer inspection

In [17]:
closer_lookups = {
    "centre" : [-3,-3,3,3],
    "caribean" : [-85.85,17.58,-79.72,21.35],
    "caribean2" : [-76.86,21.45,-71.26,26.14],
}

lims = closer_lookups["centre"]

In [18]:
places_lookup = places_h3.filter(
    _.longitude>lims[0],
    _.latitude>lims[1],
    _.longitude<lims[2],
    _.latitude<lims[3],
    )

In [19]:
places_lookup.count()

┌─────┐
│ [1;36m880[0m │
└─────┘

In [20]:
places_lookup

In [None]:
# lookup_df = places_lookup.execute()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [22]:
# gpd.GeoDataFrame(
#     lookup_df,
#     geometry=gpd.GeoSeries(gpd.points_from_xy(lookup_df["longitude"],lookup_df["latitude"],crs="epsg:4326")),
# ).explore()

## Places categories

Expanding categories

In [23]:
places_h3 = (
    places_h3
    .mutate(cats=_.fsq_category_labels[0].split(" > "))
    # .mutate(cats_break=_.cats.split(" > "))
    # .select("cats","cats_break")
    .mutate(
        primary=_.cats[0],
        sec=_.cats[1],
        raw=_.cats[2],
            )
    .drop("cats")
)

In [24]:
places_h3.head()

In [25]:
places_h3.primary.value_counts().execute()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,primary,primary_count
0,Retail,15191632
1,Community and Government,11618950
2,Event,798664
3,Landmarks and Outdoors,6706523
4,Arts and Entertainment,3228616
5,Sports and Recreation,2214536
6,Travel and Transportation,7340595
7,Health and Medicine,4615604
8,Dining and Drinking,17277887
9,,11525326


In [26]:
places_h3.sec.nunique()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌─────┐
│ [1;36m432[0m │
└─────┘

In [27]:
places_h3.sec.value_counts()


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [28]:
places_h3.raw.nunique()


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌─────┐
│ [1;36m464[0m │
└─────┘

In [29]:
places_h3.raw.value_counts()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [30]:
# 
places_h3.filter(_.primary.isnull()).sec.isnull().all()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌──────┐
│ [3;92mTrue[0m │
└──────┘

In [31]:
places_h3.filter(_.sec.isnull()).primary.isnull().all()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌───────┐
│ [3;91mFalse[0m │
└───────┘

In [32]:
places_h3.filter(_.sec.isnull()).raw.isnull().all()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌──────┐
│ [3;92mTrue[0m │
└──────┘

In [33]:
places_h3.filter(_.raw.isnull()).sec.isnull().all()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌───────┐
│ [3;91mFalse[0m │
└───────┘

### Narrowing down categories

In [34]:
primary_exclude = ["Health and Medicine","Landmarks and Outdoors","Community and Government"]

In [35]:
places_h3 = places_h3.filter(~_.primary.isin(primary_exclude))

In [36]:
print(places_h3.count())
places_h3.head()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌──────────┐
│ [1;36m69709790[0m │
└──────────┘


In [None]:
fsq_processed_file = "../datasets/foursquare/processed/places.parquet"
if not os.path.exists(fsq_processed_file):
    places_h3.select("fsq_place_id","name","latitude","longitude","locality","region","country","primary","sec","raw").to_parquet(fsq_processed_file)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

## Primary categories groups

In [36]:
# places_h3.group_by("h3_id").mutate(count=_.count())

## Places density

In [37]:
places_count = places_h3.h3_id.value_counts()

In [38]:
places_count.head()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

### Low density categories

In [40]:
# ib.to_sql(places_count)
places_h3 = places_h3.join(places_count,"h3_id",how="left",)

In [41]:
low_dens_places_df = places_h3.filter(_.h3_id_count<4).execute()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [42]:
low_dens_places_df

Unnamed: 0,fsq_place_id,name,latitude,longitude,locality,region,country,fsq_category_ids,fsq_category_labels,h3_id,primary,sec,raw,h3_id_right,h3_id_count
0,5577e639498e93b583766fa5,Льонок,51.330140,31.677754,Куликовка,,UA,[4bf58dd8d48988d1f9941735],[Retail > Food and Beverage Retail],851e61b3fffffff,Retail,Food and Beverage Retail,,851e61b3fffffff,1
1,1575a6780cba4f0b1bb3e7e6,Resto Au Soleil Couchant,48.728138,-79.462230,Clerval,QC,CA,[4bf58dd8d48988d147941735],[Dining and Drinking > Restaurant > Diner],852b94d3fffffff,Dining and Drinking,Restaurant,Diner,852b94d3fffffff,2
2,640a2ccd29068136a728990b,Pharmacie Kantakare,16.280230,-16.136066,Ross Béthio,Région de Saint Louis,SN,[4bf58dd8d48988d10f951735],[Retail > Pharmacy],85541023fffffff,Retail,Pharmacy,,85541023fffffff,3
3,4f82ec49e4b0e160bb920d36,Belén De Los Andaquíes,1.411036,-75.871689,,,CO,[4d4b7105d754a06379d81259],[Travel and Transportation],8566c043fffffff,Travel and Transportation,,,8566c043fffffff,3
4,5735ed06498e851ad9f2ebf8,El Negocio de Mamá,7.121814,0.012703,,,GH,[52f2ab2ebcbc57f1066b8b1d],[Business and Professional Services > Health a...,85752d1bfffffff,Business and Professional Services,Health and Beauty Service,Dry Cleaner,85752d1bfffffff,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120339,5531f8a7498ee29a12707829,Hero Horce Riding Club,39.112548,116.926876,,,CN,[52e81612bcbc57f1066b7a2e],[Sports and Recreation > Sports Club],85318c47fffffff,Sports and Recreation,Sports Club,,85318c47fffffff,3
120340,53fc8d9c498e738553d412b2,Posto Oeste Bahia,-12.925580,-45.974480,,,BR,[52e81612bcbc57f1066b79f4],[Dining and Drinking > Restaurant > Buffet],858129c3fffffff,Dining and Drinking,Restaurant,Buffet,858129c3fffffff,3
120341,53455551498eeaed8f6872b9,Сбербанк,48.198031,136.124949,поселок городского типа Мухен,,RU,[4bf58dd8d48988d10a951735],[Business and Professional Services > Financia...,8514d2cffffffff,Business and Professional Services,Financial Service,Banking and Finance,8514d2cffffffff,2
120342,56a632ce498e6693802ae023,comercial BA Lucena,-6.654443,-64.291641,,,BR,[4eb1bea83b7b6f98df247e06],[Business and Professional Services > Factory],858a0ccffffffff,Business and Professional Services,Factory,,858a0ccffffffff,1


In [None]:
dens_thres = 2

In [None]:
places_count_df = places_count.filter(_.h3_id_count>dens_thres).execute()

In [None]:
places_count_df.head()

## Plotting 

In [None]:
places_count_df["cols"] = cmap(places_count_df["h3_id_count"],palette=count_pal,log=True)

In [None]:

# Define a layer to display on a map
layer = pdk.Layer(
    "H3HexagonLayer",
    places_count_df,#.sample(300_000),
    pickable=True,
    stroked=True,
    filled=True,
    opacity = .6,
    extruded=False,
    get_hexagon="h3_id",
    get_fill_color="cols",
    get_line_color=[255, 255, 255,0],
    line_width_min_pixels=0,
)

# Set the viewport location
view_state = pdk.ViewState(latitude=0, longitude=0, zoom=3, bearing=0, pitch=30)


# Render
r = pdk.Deck(layers=[layer], initial_view_state=view_state, tooltip={"text": "Count: {h3_id_count}"})
r.to_html("../deck_maps/foursquare_places_dens_layer.html",
          iframe_height=700,
          )