**DISCLAIMER**

By accessing this code, you acknowledge the code is made available for presentation and demonstration purposes only and that the code (1) is not subject to SOC 1 and SOC 2 compliance audits, and (2) is not designed or intended to be a substitute for the professional advice, diagnosis, treatment, or judgment of a certified financial services professional. Do not use this code to replace, substitute, or provide professional financial advice, or judgement. You are solely responsible for ensuring the regulatory, legal, and/or contractual compliance of any use of the code, including obtaining any authorizations or consents, and any solution you choose to build that incorporates this code in whole or in part.

In [0]:
%pip install geojson

In [0]:
%pip install /dbfs/FileStore/demo-fsi/geoscan/python folium==0.12.1 h3==3.7.1 mlflow

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS geospatial_miami

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import * 

schema = StructType([
    StructField('user', StringType()),
    StructField('latitude', DoubleType()),
    StructField('longitude', DoubleType()),
    StructField('amount', DoubleType()),
  ])

_ = (
  spark
    .read
    .format('csv')
    .option('header', 'true')
    .schema(schema)
    .load('/FileStore/geospatial_fraud_detection/miami.csv')
    .write
    .format('delta')
    .mode('overwrite')
    .saveAsTable('geospatial_miami.transactions')
)

In [0]:
%sql
OPTIMIZE geospatial_miami.transactions ZORDER BY (user)

path,metrics
,"List(1, 2, List(2208634, 2208634, 2208634.0, 1, 2208634), List(1079108, 1219781, 1149444.0, 2, 2298889), 0, List(minCubeSize(107374182400), List(0, 0), List(2, 2298889), 0, List(2, 2298889), 1, null), 1)"


In [0]:
points_df = spark.read.table('geospatial_miami.transactions')
display(points_df)

user,latitude,longitude,amount
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,25.7364282,-80.232984,134.61
27e8040c-0649-4674-b48b-49e308a8c9e5,25.7360642,-80.2340774,108.91
7e7bb66b-44ea-4d7d-9450-be80f1e28990,25.7359423,-80.2342303,142.25
f0485504-04f4-4bc6-912f-6913d0519223,25.7358308,-80.2343706,121.28
2a280722-2008-4d82-aa02-9a57fec8c472,25.7352896,-80.2342836,102.65
c3376825-04d8-49b7-8c89-cd3fd14df8b7,25.7354298,-80.2343998,184.89
86906e6f-f6db-477b-b261-929ce15fa4d2,25.7355748,-80.2341353,116.22
1f8704c6-0752-4e52-80eb-8784a5e324b2,25.7355942,-80.2339791,14.51
c3376825-04d8-49b7-8c89-cd3fd14df8b7,25.7357477,-80.2345242,17.14
add072c0-77fe-43ca-8c47-35645861a05e,25.7355126,-80.2346039,130.98


In [0]:
import h3
from pyspark.sql.functions import udf
from pyspark.sql import functions as F

@udf("string")
def to_h3(lat, lng, precision):
  h = h3.geo_to_h3(lat, lng, precision)
  return h.upper()

display(
  spark.read.table('geospatial_miami.transactions')
    .groupBy(to_h3(F.col('latitude'), F.col('longitude'), F.lit(5)).alias('h3'))
    .count()
    .orderBy(F.desc('count'))
)

h3,count
8544A18FFFFFFFF,43435
8544A117FFFFFFF,40806
8544A113FFFFFFF,17955
8544A1ABFFFFFFF,5887
8544A103FFFFFFF,4121
8544A1BBFFFFFFF,1966
8544A187FFFFFFF,1322
8544A183FFFFFFF,1037
8544A197FFFFFFF,473
8544A107FFFFFFF,383


In [0]:
from folium import plugins

In [0]:
import folium
from folium import plugins

points = spark.read.table('geospatial_miami.transactions').sample(0.1).toPandas()[['latitude', 'longitude']]
miami = folium.Map([25.761681,-80.191788], zoom_start=10, width='80%', height='100%')
folium.TileLayer('Stamen Toner').add_to(miami)
miami.add_child(plugins.HeatMap(points.to_numpy(), radius=12))
miami

In [0]:
points_sampled = points_df.sample(0.1)

In [0]:
from geoscan import Geoscan
import mlflow

with mlflow.start_run(run_name='GEOSCAN_MIAMI') as run:

  geoscan = Geoscan() \
    .setLatitudeCol('latitude') \
    .setLongitudeCol('longitude') \
    .setPredictionCol('cluster') \
    .setEpsilon(800) \
    .setMinPts(14)
  
  mlflow.log_param('epsilon', 800)
  mlflow.log_param('minPts', 14)
  
  model = geoscan.fit(points_sampled)
  mlflow.spark.log_model(model, "geoscan_miami")
  run_id = run.info.run_id

In [0]:
geoJson = model.toGeoJson()
with open('/tmp/geoscan_miami.geojson', 'w') as f:
  f.write(geoJson)

import mlflow
client = mlflow.tracking.MlflowClient()
client.log_artifact(run_id, "/tmp/geoscan_miami.geojson")

In [0]:
folium.GeoJson(geoJson).add_to(miami)
miami

In [0]:
import random
from pyspark.sql.types import *

# we randomly select maximum 10 points within a same polygon of size 11 (30m)
def sample(latitudes, longitudes):
  l = list(zip(latitudes, longitudes))
  return random.sample(l, min(len(l), 14))

sample_schema = ArrayType(StructType([StructField("latitude", DoubleType()), StructField("longitude", DoubleType())]))
sample_udf = udf(sample, sample_schema)

sample_df = (
  points_df
    .groupBy(to_h3(F.col("latitude"), F.col("longitude"), F.lit(11)))
    .agg(F.collect_list(F.col("latitude")).alias("latitudes"), F.collect_list(F.col("longitude")).alias("longitudes"))
    .withColumn('sample', F.explode(sample_udf(F.col('latitudes'), F.col('longitudes'))))
    .select('sample.latitude', 'sample.longitude')
)

display(
  sample_df
    .groupBy(to_h3(F.col("latitude"), F.col("longitude"), F.lit(9)).alias("h3"))
    .count()
    .orderBy(F.desc("count"))
)

h3,count
8944A1AB53BFFFF,298
8944A1AB52BFFFF,288
8944A116B4BFFFF,278
8944A1AB5D7FFFF,219
8944A1B9A2FFFFF,213
8944A1B9A6FFFFF,213
8944A1AB4D3FFFF,209
8944A1AB58BFFFF,204
8944A1AB5C7FFFF,203
8944A1AB497FFFF,199


In [0]:
from pyspark.sql import functions as F

display(
  model
    .transform(points_df)
    .groupBy('cluster')
    .count()
    .orderBy(F.asc('cluster'))
)

cluster,count
,7549
0.0,80429
1.0,20568
2.0,8505
3.0,306
4.0,133


In [0]:
from folium.plugins import MarkerCluster

miami_anomalies_points = model.transform(points_df).filter(F.expr('cluster IS NULL')).sample(0.01).toPandas()
miami_anomalies = folium.Map([25.761681,-80.191788], zoom_start=12, width='80%', height='100%')
folium.TileLayer('Stamen Toner').add_to(miami_anomalies)
folium.GeoJson(geoJson, name="geojson").add_to(miami_anomalies)
for _, point in miami_anomalies_points.iterrows():
  folium.CircleMarker([point.latitude, point.longitude], radius=2, color='red').add_to(miami_anomalies)

miami_anomalies

In [0]:
%fs rm -r dbfs:/FileStore/demo-fsi/models/geoscan_miami

In [0]:
model.save('/FileStore/demo-fsi/models/geoscan_miami')

In [0]:
from geoscan import GeoscanModel
model = GeoscanModel.load('/FileStore/demo-fsi/models/geoscan_miami')

In [0]:
from geoscan import GeoscanPersonalized
import mlflow

with mlflow.start_run(run_name='GEOSCAN_MIAMI_PERSONALIZED') as run:

  geoscan = GeoscanPersonalized() \
    .setLatitudeCol('latitude') \
    .setLongitudeCol('longitude') \
    .setPredictionCol('cluster') \
    .setGroupedCol('user') \
    .setEpsilon(800) \
    .setMinPts(9)

  models = geoscan.fit(points_df)
  
  mlflow.log_param('epsilon', 800)
  mlflow.log_param('minPts', 9)
  run_id = run.info.run_id

In [0]:
%fs rm -r dbfs:/FileStore/demo-fsi/models/geoscan_miami_personalized

In [0]:
models.save('/FileStore/demo-fsi/models/geoscan_miami_personalized')

In [0]:
from geoscan import GeoscanPersonalizedModel
model_personalized = GeoscanPersonalizedModel.load('/FileStore/demo-fsi/models/geoscan_miami_personalized')

In [0]:
geoJsons = model_personalized.toGeoJson()
display(geoJsons)

user,cluster
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1909898,25.8366814],[-80.1909898,25.8366814],[-80.1909898,25.8366814],[-80.1909898,25.8366814]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2887536,25.8805134],[-80.2887536,25.8805134],[-80.2887536,25.8805134],[-80.2887536,25.8805134]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1287899,25.8637311],[-80.1287899,25.8637311],[-80.1287899,25.8637311],[-80.1287899,25.8637311]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2317452,25.7934699],[-80.2311459,25.7938602],[-80.2311459,25.7938602],[-80.2317452,25.7934699],[-80.2317452,25.7934699]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2711905,25.8733634],[-80.2711905,25.8733634],[-80.2711905,25.8733634],[-80.2711905,25.8733634]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2662395,25.752664],[-80.2662395,25.752664],[-80.266162,25.7544316],[-80.266162,25.7544316],[-80.2664126,25.7552817],[-80.266642,25.7552727],[-80.2662395,25.752664]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3028346,25.7682879],[-80.2954263,25.7685212],[-80.2928529,25.7712834],[-80.3000259,25.7742709],[-80.3028346,25.7682879]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1932256,25.8153361],[-80.1932256,25.8153361],[-80.1962503,25.8206981],[-80.1970874,25.819012],[-80.2046386,25.820372],[-80.2008864,25.8167332],[-80.1932256,25.8153361]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.218791,25.8288319],[-80.2122725,25.8304417],[-80.2136767,25.8326901],[-80.2149485,25.8345818],[-80.2183551,25.8309762],[-80.218791,25.8288319]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2130673,25.800085],[-80.2130673,25.800085],[-80.2069565,25.8015571],[-80.2069565,25.8015571],[-80.2122648,25.8087139],[-80.2130673,25.800085]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2992887,25.8642998],[-80.2924814,25.8722919],[-80.2991773,25.8711795],[-80.3020447,25.8699474],[-80.3008212,25.864941],[-80.2992887,25.8642998]]]}},{""type"":""Feature"",""id"":11,""properties"":{""name"":""CLUSTER-11""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3375881,25.8874853],[-80.3341548,25.8895404],[-80.3339701,25.8905984],[-80.3411234,25.8921806],[-80.3414502,25.8912889],[-80.341168,25.8898181],[-80.3375881,25.8874853]]]}},{""type"":""Feature"",""id"":12,""properties"":{""name"":""CLUSTER-12""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2548376,25.7227923],[-80.2508757,25.724679],[-80.2454013,25.7325963],[-80.2454285,25.7415429],[-80.2454491,25.7420016],[-80.2579066,25.7399681],[-80.2589535,25.735533],[-80.2548376,25.7227923]]]}},{""type"":""Feature"",""id"":13,""properties"":{""name"":""CLUSTER-13""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2255746,25.7425938],[-80.2249251,25.7426394],[-80.2131076,25.7568318],[-80.2128608,25.762845],[-80.2171405,25.7628341],[-80.2380256,25.7612338],[-80.243891,25.7536799],[-80.2255746,25.7425938]]]}}]}"
1250b133-5388-44f3-bb3c-9df5ce8712ce,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1283033,25.8585723],[-80.1283033,25.8585723],[-80.1283033,25.8585723],[-80.1283033,25.8585723]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2346539,25.8067455],[-80.2346539,25.8067455],[-80.2346539,25.8067455],[-80.2346539,25.8067455]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2519689,25.7756849],[-80.247236,25.7810084],[-80.2443401,25.7857478],[-80.2483158,25.7887299],[-80.2515124,25.7821404],[-80.2519689,25.7756849]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1969654,25.8013044],[-80.1969654,25.8013044],[-80.1969654,25.8013044],[-80.1969654,25.8013044]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3517873,25.9098307],[-80.3513835,25.9108788],[-80.3507774,25.9159214],[-80.3517558,25.920376],[-80.3555817,25.9193081],[-80.3534954,25.9119795],[-80.3517873,25.9098307]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3065483,25.8786507],[-80.3065483,25.8786507],[-80.3125263,25.8924677],[-80.3106256,25.8876601],[-80.3120936,25.8900795],[-80.319577,25.8898087],[-80.3180663,25.8882089],[-80.3096884,25.8795539],[-80.3065483,25.8786507]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.277072,25.8700466],[-80.277072,25.8700466],[-80.2706444,25.8733974],[-80.2706444,25.8733974],[-80.2707215,25.8750437],[-80.2760912,25.878233],[-80.2763229,25.8782284],[-80.2806211,25.8726946],[-80.277072,25.8700466]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2177092,25.7995785],[-80.214158,25.8047545],[-80.2148601,25.8112221],[-80.2157,25.8163909],[-80.2180087,25.8280911],[-80.2199046,25.8301973],[-80.2223332,25.8176285],[-80.2224264,25.8157252],[-80.2177092,25.7995785]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2767173,25.7619447],[-80.2721096,25.7647372],[-80.2689763,25.7736197],[-80.2704995,25.7770474],[-80.2782987,25.7763291],[-80.2784677,25.7649262],[-80.2767173,25.7619447]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2871373,25.7669984],[-80.2871373,25.7669984],[-80.2863107,25.7739639],[-80.2863107,25.7739639],[-80.2903764,25.7764474],[-80.2960178,25.7767575],[-80.2995315,25.7743264],[-80.3005295,25.7717593],[-80.2957821,25.7686756],[-80.2886327,25.7672027],[-80.2871373,25.7669984]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1985341,25.8280589],[-80.1905466,25.8281043],[-80.1860957,25.8455461],[-80.1931697,25.8470053],[-80.2021052,25.8442977],[-80.1985341,25.8280589]]]}},{""type"":""Feature"",""id"":11,""properties"":{""name"":""CLUSTER-11""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2549972,25.7147542],[-80.2219419,25.7424639],[-80.2162189,25.7495985],[-80.2131191,25.7552633],[-80.2200398,25.781289],[-80.2308036,25.7861241],[-80.2709898,25.7517123],[-80.2700279,25.7448846],[-80.2634703,25.7284482],[-80.2580566,25.7166046],[-80.2549972,25.7147542]]]}}]}"
ea705812-ea7b-48aa-b00c-fe0b3f623c81,"{""type"":""FeatureCollection"",""features"":[]}"
c352b76f-5f8e-437f-81ac-29fcf92f1d6f,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3087572,25.8549049],[-80.3022038,25.8579825],[-80.3022038,25.8579825],[-80.3087572,25.8549049],[-80.3087572,25.8549049]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.136351,25.7737441],[-80.136351,25.7737441],[-80.1334149,25.7763304],[-80.1334149,25.7763304],[-80.1357333,25.7896212],[-80.1373198,25.7898634],[-80.1379328,25.7879253],[-80.1389248,25.7838664],[-80.1380904,25.7774068],[-80.136351,25.7737441]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1357199,25.8089636],[-80.1264311,25.8101531],[-80.1256457,25.8108074],[-80.1265023,25.8249258],[-80.1267163,25.8277187],[-80.1290811,25.8296036],[-80.1309061,25.8288138],[-80.1375452,25.8140375],[-80.1357199,25.8089636]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3558776,25.9034119],[-80.3504233,25.9035056],[-80.3489687,25.9242889],[-80.354014,25.9267873],[-80.3548643,25.9262334],[-80.3566224,25.9220211],[-80.3569944,25.9196629],[-80.3558776,25.9034119]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1371945,25.850265],[-80.1343189,25.8505797],[-80.1290308,25.8542426],[-80.1249592,25.85928],[-80.1232372,25.8620061],[-80.1219762,25.8671892],[-80.1255009,25.8707789],[-80.1267139,25.8707309],[-80.1277664,25.8702245],[-80.1351113,25.8643438],[-80.1394457,25.8604562],[-80.1426251,25.8526735],[-80.1417532,25.8506585],[-80.1371945,25.850265]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2802322,25.8675469],[-80.2738801,25.8678251],[-80.2658103,25.8682517],[-80.264757,25.8688539],[-80.2675987,25.8730187],[-80.2704848,25.8770361],[-80.2965239,25.897446],[-80.3528217,25.8959596],[-80.3553713,25.8893069],[-80.3516997,25.8865365],[-80.3285772,25.8729011],[-80.3250347,25.8714928],[-80.3043472,25.8688092],[-80.2802322,25.8675469]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.265369,25.6967134],[-80.2599645,25.6989806],[-80.2482741,25.7072795],[-80.2038047,25.7530427],[-80.2015562,25.7565194],[-80.1775927,25.8402589],[-80.1793029,25.8497923],[-80.1896027,25.8517694],[-80.2217092,25.8357811],[-80.3071937,25.7707077],[-80.3065515,25.7643106],[-80.265369,25.6967134]]]}}]}"
89ac946e-32e8-4b08-9542-175afe87c5eb,"{""type"":""FeatureCollection"",""features"":[]}"
df60d2d1-0c97-4674-b9f7-ee4b59b01789,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2515971,25.730977],[-80.2515971,25.730977],[-80.2515971,25.730977],[-80.2515971,25.730977]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2902598,25.7705265],[-80.2902598,25.7705265],[-80.2955922,25.7730057],[-80.2902598,25.7705265],[-80.2902598,25.7705265]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.212157,25.834205],[-80.212157,25.834205],[-80.212157,25.834205],[-80.212157,25.834205]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2549587,25.7415774],[-80.2488266,25.744191],[-80.2530214,25.7427044],[-80.2549587,25.7415774]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.268777,25.7644894],[-80.2615532,25.7775722],[-80.2695551,25.7731879],[-80.2759936,25.7667526],[-80.268777,25.7644894]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3167268,25.8822726],[-80.3167268,25.8822726],[-80.3146328,25.8868043],[-80.3146328,25.8868043],[-80.3176136,25.8931226],[-80.3217966,25.8881362],[-80.3217963,25.887706],[-80.3167268,25.8822726]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3440772,25.887764],[-80.3375835,25.8892814],[-80.3375138,25.890497],[-80.3412679,25.8939844],[-80.3491358,25.890063],[-80.3470148,25.8889044],[-80.3440772,25.887764]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2723727,25.8696868],[-80.2723727,25.8696868],[-80.2733447,25.8732578],[-80.2746232,25.8750567],[-80.2767556,25.8773534],[-80.2815143,25.8794325],[-80.2831651,25.8789281],[-80.2814826,25.8698643],[-80.2723727,25.8696868]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2395088,25.7671084],[-80.2395088,25.7671084],[-80.2372014,25.7697691],[-80.2372014,25.7697691],[-80.2426269,25.7833309],[-80.2474104,25.7839617],[-80.2479347,25.7837563],[-80.2465759,25.7761459],[-80.2449285,25.7715601],[-80.2438937,25.7694865],[-80.2433057,25.7691049],[-80.2395088,25.7671084]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2126491,25.8012139],[-80.1966516,25.8194691],[-80.1961449,25.8241751],[-80.198385,25.8268613],[-80.2116347,25.8245947],[-80.2207158,25.8211207],[-80.2236961,25.8182984],[-80.2179205,25.805101],[-80.2126491,25.8012139]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2272354,25.7492968],[-80.2153021,25.75304],[-80.2053257,25.7563732],[-80.2182947,25.7673127],[-80.2284964,25.7684269],[-80.2456498,25.7559524],[-80.2411371,25.7522732],[-80.2338324,25.7501958],[-80.2272354,25.7492968]]]}}]}"
691b91b2-81a6-49cb-b19c-73bbb66c006f,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3512882,25.9183662],[-80.3512882,25.9183662],[-80.3512882,25.9183662],[-80.3512882,25.9183662]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1342821,25.8164708],[-80.1339788,25.817085],[-80.1340017,25.8191127],[-80.1342821,25.8164708]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1370031,25.7791783],[-80.1355709,25.7817621],[-80.1348849,25.783277],[-80.1365881,25.7813143],[-80.1370031,25.7791783]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3020972,25.8555093],[-80.3019145,25.8598656],[-80.3018494,25.8614757],[-80.3025577,25.8623006],[-80.3031212,25.8611871],[-80.3020972,25.8555093]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1235471,25.861056],[-80.122384,25.8613804],[-80.1215315,25.8667983],[-80.1267833,25.8684693],[-80.1293965,25.8627229],[-80.1235471,25.861056]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2704664,25.8724155],[-80.2704664,25.8724155],[-80.2695525,25.8765074],[-80.2695525,25.8765074],[-80.270388,25.8796827],[-80.2727967,25.8800808],[-80.2751836,25.8781883],[-80.2763201,25.8755115],[-80.2704664,25.8724155]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.355241,25.9023433],[-80.350986,25.9027106],[-80.3508158,25.9036964],[-80.3521852,25.9086373],[-80.3564659,25.9080332],[-80.3563873,25.9063291],[-80.355241,25.9023433]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.228249,25.7919648],[-80.228249,25.7919648],[-80.2270851,25.7949533],[-80.2270851,25.7949533],[-80.2371143,25.8057127],[-80.237548,25.8048081],[-80.2382785,25.798695],[-80.23538,25.7950617],[-80.2333297,25.7936629],[-80.228249,25.7919648]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.326107,25.8756372],[-80.326107,25.8756372],[-80.3253004,25.8766176],[-80.3253004,25.8766176],[-80.3294517,25.8828485],[-80.333975,25.8874984],[-80.3348368,25.8880866],[-80.3372789,25.8895243],[-80.3479182,25.893859],[-80.3478903,25.8932022],[-80.3470449,25.8894939],[-80.3466918,25.8881038],[-80.3453287,25.8858843],[-80.3302721,25.8766934],[-80.326107,25.8756372]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3010384,25.8711492],[-80.3010384,25.8711492],[-80.2945979,25.8732195],[-80.2945979,25.8732195],[-80.2962851,25.8922244],[-80.3197615,25.8925271],[-80.3222056,25.8902139],[-80.3217622,25.8869499],[-80.3209503,25.884807],[-80.3010384,25.8711492]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2613722,25.7074406],[-80.2561824,25.7114261],[-80.2139308,25.7504729],[-80.2040596,25.7626022],[-80.1933724,25.7908201],[-80.1744721,25.8485842],[-80.1793908,25.850877],[-80.1959893,25.8494768],[-80.2965154,25.7731361],[-80.2986253,25.7666801],[-80.2613722,25.7074406]]]}}]}"
7627848d-c2ba-47f3-b5ce-1466e018c0c7,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2780581,25.8736805],[-80.2780581,25.8736805],[-80.2780581,25.8736805],[-80.2780581,25.8736805]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2675901,25.7488373],[-80.2675901,25.7488373],[-80.2675901,25.7488373],[-80.2675901,25.7488373]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2327751,25.7752465],[-80.2327751,25.7752465],[-80.2327751,25.7752465],[-80.2327751,25.7752465]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2334774,25.806243],[-80.2334774,25.806243],[-80.2334774,25.806243],[-80.2334774,25.806243]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3054482,25.8555053],[-80.3054482,25.8555053],[-80.3038102,25.8562639],[-80.3038102,25.8562639],[-80.3054482,25.8555053]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3476061,25.8849967],[-80.3476061,25.8849967],[-80.3476061,25.8849967],[-80.3493218,25.8919502],[-80.3537775,25.8892386],[-80.3535042,25.8890193],[-80.3476061,25.8849967]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3151647,25.8817924],[-80.3151647,25.8817924],[-80.313411,25.8866283],[-80.313411,25.8866283],[-80.3146792,25.8949386],[-80.3174308,25.8882123],[-80.3151647,25.8817924]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2910889,25.8889556],[-80.2910889,25.8889556],[-80.2947991,25.8946605],[-80.3006791,25.8955354],[-80.2997157,25.8946165],[-80.3022818,25.8948148],[-80.2981686,25.8894352],[-80.2968541,25.8889992],[-80.2910889,25.8889556]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1955363,25.786785],[-80.1955363,25.786785],[-80.1927609,25.7913816],[-80.1927609,25.7913816],[-80.1951661,25.7935628],[-80.1986847,25.796152],[-80.1989547,25.7931456],[-80.1986067,25.7907106],[-80.1955363,25.786785]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3517834,25.9138508],[-80.3509674,25.9196559],[-80.3509726,25.9197696],[-80.3522435,25.9224793],[-80.3551835,25.9228115],[-80.3569592,25.9188876],[-80.3522078,25.914222],[-80.3517834,25.9138508]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2252389,25.7437651],[-80.2183445,25.7439952],[-80.2045318,25.7569737],[-80.2016837,25.7803651],[-80.2062361,25.780041],[-80.218232,25.7731473],[-80.2257152,25.7667212],[-80.2252389,25.7437651]]]}},{""type"":""Feature"",""id"":11,""properties"":{""name"":""CLUSTER-11""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2488361,25.7347105],[-80.2488361,25.7347105],[-80.2341626,25.7442779],[-80.2341626,25.7442779],[-80.2353412,25.7590834],[-80.2435822,25.7727606],[-80.2720464,25.7740011],[-80.2782332,25.7742168],[-80.286792,25.773976],[-80.2928487,25.7711643],[-80.2830392,25.7580555],[-80.2488361,25.7347105]]]}},{""type"":""Feature"",""id"":12,""properties"":{""name"":""CLUSTER-12""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2115784,25.8009015],[-80.2063866,25.8016786],[-80.2040724,25.802455],[-80.1983046,25.8045714],[-80.1966649,25.8060332],[-80.1955394,25.8081026],[-80.1854664,25.8414932],[-80.1890261,25.844594],[-80.1908826,25.8452641],[-80.1982344,25.8457776],[-80.2265462,25.8286787],[-80.227213,25.8278049],[-80.2274237,25.8256456],[-80.2276527,25.8145057],[-80.2140319,25.8014593],[-80.2129577,25.8012031],[-80.2115784,25.8009015]]]}}]}"
32ae2f3d-5dff-4696-a518-8ac3411df56c,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1380951,25.7775521],[-80.1380951,25.7775521],[-80.1380951,25.7775521],[-80.1380951,25.7775521]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1227158,25.8610904],[-80.1208371,25.8650954],[-80.1251555,25.8666608],[-80.1264098,25.8669615],[-80.124086,25.862542],[-80.1227158,25.8610904]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3117651,25.8515498],[-80.3097111,25.8518971],[-80.2975664,25.8584862],[-80.2957247,25.8609983],[-80.3007917,25.8635289],[-80.3111333,25.8562666],[-80.3117651,25.8515498]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3542705,25.9171581],[-80.3513402,25.919804],[-80.3516403,25.9223564],[-80.3562649,25.9232599],[-80.356929,25.9182231],[-80.3542705,25.9171581]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3379985,25.8850273],[-80.3355821,25.8862761],[-80.334374,25.889289],[-80.3471579,25.8932023],[-80.3495745,25.8907038],[-80.3462625,25.8868909],[-80.3379985,25.8850273]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1828302,25.8435562],[-80.1748926,25.8464434],[-80.1759877,25.8533428],[-80.1801697,25.8526945],[-80.1841215,25.8520472],[-80.1845055,25.8511002],[-80.1828302,25.8435562]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2812236,25.724569],[-80.2786463,25.7276838],[-80.2783924,25.7310458],[-80.2785295,25.7348106],[-80.285384,25.7337367],[-80.2859579,25.7282294],[-80.2812236,25.724569]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1292894,25.8048354],[-80.1282341,25.8058219],[-80.1262523,25.8091671],[-80.1275729,25.8184755],[-80.1302905,25.8242044],[-80.1305571,25.8241995],[-80.13407,25.8211788],[-80.1349504,25.8167277],[-80.1307866,25.8048901],[-80.1292894,25.8048354]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2751952,25.8687107],[-80.2709893,25.8697657],[-80.2730282,25.8738225],[-80.2904463,25.8884029],[-80.3018687,25.8940467],[-80.3147067,25.8955537],[-80.3216869,25.889001],[-80.3256755,25.876456],[-80.3249231,25.8753833],[-80.3105925,25.8723978],[-80.2751952,25.8687107]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2604183,25.7024423],[-80.2536138,25.7085412],[-80.2186408,25.7407354],[-80.1991845,25.7628017],[-80.1951821,25.7694882],[-80.1912527,25.7943185],[-80.1898332,25.8399142],[-80.1944491,25.8440243],[-80.2216242,25.8378034],[-80.2240751,25.8368333],[-80.3040937,25.7728973],[-80.3066162,25.7662039],[-80.2699294,25.713322],[-80.2604183,25.7024423]]]}}]}"
2bc861c4-34aa-4433-aa46-23ebed4a2122,"{""type"":""FeatureCollection"",""features"":[]}"


In [0]:
from pyspark.sql import functions as F

In [0]:
#b8fe8ae7-3450-416f-ae75-a03fb0488fbf
from pyspark.sql import functions as F

user = 'b07db0f6-7958-4f8e-b2ea-b80e775ff48b'
personalized_geojson = geoJsons.filter(F.col('user') == user).toPandas().iloc[0].cluster
personalized_data = points_df.filter(F.col('user') == user).toPandas()[['latitude', 'longitude']]

miami_personalized = folium.Map([25.761681,-80.191788], zoom_start=12, width='80%', height='100%')
folium.TileLayer('Stamen Toner').add_to(miami_personalized)
miami_personalized.add_child(plugins.HeatMap(personalized_data.to_numpy(), radius=16))
folium.GeoJson(personalized_geojson, name="geojson").add_to(miami_personalized)
miami_personalized

In [0]:
#Export personalized data coordinates for b07db0f6-7958-4f8e-b2ea-b80e775ff48b as csv
points_df.filter(F.col('user') == user).toPandas().to_csv('/dbfs/FileStore/geospatial_fraud_detection/personalized_miami_data_b07db0f6-7958-4f8e-b2ea-b80e775ff48b.csv' ,index=False)

In [0]:
import geojson
with open('/dbfs/FileStore/geospatial_fraud_detection/personalized_miami_geojson_b07db0f6-7958-4f8e-b2ea-b80e775ff48b.geojson', 'w') as f:
   geojson.dump(geojson.loads(personalized_geojson), f)

In [0]:
display(model_personalized.transform(points_df))

user,latitude,longitude,amount,cluster
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,25.7364282,-80.232984,134.61,9.0
27e8040c-0649-4674-b48b-49e308a8c9e5,25.7360642,-80.2340774,108.91,8.0
7e7bb66b-44ea-4d7d-9450-be80f1e28990,25.7359423,-80.2342303,142.25,
f0485504-04f4-4bc6-912f-6913d0519223,25.7358308,-80.2343706,121.28,
2a280722-2008-4d82-aa02-9a57fec8c472,25.7352896,-80.2342836,102.65,6.0
c3376825-04d8-49b7-8c89-cd3fd14df8b7,25.7354298,-80.2343998,184.89,
86906e6f-f6db-477b-b261-929ce15fa4d2,25.7355748,-80.2341353,116.22,5.0
1f8704c6-0752-4e52-80eb-8784a5e324b2,25.7355942,-80.2339791,14.51,
c3376825-04d8-49b7-8c89-cd3fd14df8b7,25.7357477,-80.2345242,17.14,
add072c0-77fe-43ca-8c47-35645861a05e,25.7355126,-80.2346039,130.98,


In [0]:
personalized_tiles = model_personalized.getTiles(precision=10, layers=5)
display(personalized_tiles)

user,cluster,h3
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18DC2A7FFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18D89AFFFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18D8937FFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18D8B27FFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18DDC97FFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18D8827FFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18DD4D7FFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18DC2AFFFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18D891FFFF
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,5,8A44A18DC217FFF


In [0]:
points_h3 = points_df.select(F.col('user'), to_h3(F.col('latitude'), F.col('longitude'), F.lit(10)).alias('h3'))
document_frequency = (
  personalized_tiles
    .drop('user')
    .join(points_h3, ['h3'])
    .select('user', 'h3')
    .distinct()
    .groupBy('h3')
    .agg(F.sum(F.lit(1)).alias('df'))
)

In [0]:
term_frequency = (
  personalized_tiles
    .join(points_h3, ['h3', 'user'])
    .groupBy('user', 'h3', 'cluster')
    .agg(F.sum(F.lit(1)).alias('tf'))
)

In [0]:
term_frequency.count()

In [0]:
import math
n = sc.broadcast(document_frequency.count())

@udf('double')
def tf_idf(tf, df):
  return tf * math.log(n.value / df)

personalized_areas = (
  term_frequency
    .join(document_frequency, ['h3'])
    .withColumn('tf_idf', tf_idf(F.col('tf'), F.col('df')))
    .select('user', 'cluster', 'h3', 'tf_idf')
)

# display(personalized_areas)

In [0]:
print(personalized_areas.count())

In [0]:
personalized_areas.write.format('delta').mode('overwrite').saveAsTable('geospatial_miami.tiles')

In [0]:
%sql
OPTIMIZE geospatial_miami.tiles ZORDER BY (user, h3)

path,metrics
,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 0, List(minCubeSize(107374182400), List(0, 0), List(1, 271601), 0, List(0, 0), 0, null), 0)"


In [0]:
personalized_tiles = spark.read.table('geospatial_miami.tiles').filter(F.col('user') == user)
display(personalized_tiles)

user,cluster,h3,tf_idf
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A1129A67FFF,6.444572777304549
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A11762CFFFF,6.501731191144497
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A18D8657FFF,7.137719957864494
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A18DB31FFFF,6.199450319271564
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A18DB1A7FFF,6.626894334098504
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A112C2EFFFF,7.543185065972659
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A117384FFFF,7.032359442206668
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,5,8A44A1023D07FFF,6.769995177739177
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,5,8A44A10204A7FFF,6.850037885412713
b07db0f6-7958-4f8e-b2ea-b80e775ff48b,9,8A44A18DD0CFFFF,6.5623558129609325


In [0]:
personalized_density = personalized_tiles.groupBy('cluster').agg(F.max('tf_idf').alias('max_tf_idf')).toPandas()[['cluster', 'max_tf_idf']]
personalized_geojson = geoJsons.filter(F.col('user') == user).toPandas().cluster.iloc[0]
data_bins = list(personalized_density.max_tf_idf.quantile([0, 0.25, 0.5, 0.6, 0.7, 0.8, 0.9, 1]))
miami_personalized = folium.Map([25.761681,-80.191788], zoom_start=12, width='80%', height='100%')
folium.TileLayer('Stamen Toner').add_to(miami_personalized)

# Color least popular areas by quantile
folium.Choropleth(
    geo_data = personalized_geojson,
    name='choropleth',
    data = personalized_density,
    columns=['cluster','max_tf_idf'],
    key_on='feature.id',
    fill_color='BuPu',
    fill_opacity=0.9,
    line_opacity=0.7
).add_to(miami_personalized)

miami_personalized

In [0]:
%pip install pybloomfiltermmap3==0.5.3 h3==3.7.1 folium==0.12.1 mlflow

In [0]:
tiles = spark.read.table('geospatial_miami.tiles')
display(tiles)

user,cluster,h3,tf_idf
2cf14366-9a82-4350-aad6-41b940d34f23,10,8A44A18DA1AFFFF,6.769995177739177
2619f516-692f-4dea-9af3-d11405fcb0e0,7,8A44A18DA1AFFFF,13.539990355478354
c5a6666b-e025-4b54-adcd-ed03d184627f,8,8A44A18DA1AFFFF,6.769995177739177
ba1b1e47-41f4-4518-b299-5e76c1f2a1ac,10,8A44A18DA1AFFFF,6.769995177739177
add072c0-77fe-43ca-8c47-35645861a05e,8,8A44A18DA1AFFFF,6.769995177739177
8c4e3a33-9839-4b80-beb0-ba2450c63dd7,10,8A44A18DA1AFFFF,6.769995177739177
42858747-cba8-451f-8429-bccb324a42dd,7,8A44A18DA1AFFFF,6.769995177739177
3672c32c-6233-4ec8-b06f-24323f44f232,10,8A44A18DA1AFFFF,6.769995177739177
81016721-928b-4303-8720-df2880e6c542,12,8A44A18DA1AFFFF,6.769995177739177
df27caa8-d0c1-475d-9648-e318592e95e5,7,8A44A18DA1AFFFF,6.769995177739177


In [0]:
model_personalized = spark.read.format('parquet').load('/FileStore/demo-fsi/models/geoscan_miami_personalized/data')
display(model_personalized)

user,cluster
b8fe8ae7-3450-416f-ae75-a03fb0488fbf,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1909898,25.8366814],[-80.1909898,25.8366814],[-80.1909898,25.8366814],[-80.1909898,25.8366814]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2887536,25.8805134],[-80.2887536,25.8805134],[-80.2887536,25.8805134],[-80.2887536,25.8805134]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1287899,25.8637311],[-80.1287899,25.8637311],[-80.1287899,25.8637311],[-80.1287899,25.8637311]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2317452,25.7934699],[-80.2311459,25.7938602],[-80.2311459,25.7938602],[-80.2317452,25.7934699],[-80.2317452,25.7934699]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2711905,25.8733634],[-80.2711905,25.8733634],[-80.2711905,25.8733634],[-80.2711905,25.8733634]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2662395,25.752664],[-80.2662395,25.752664],[-80.266162,25.7544316],[-80.266162,25.7544316],[-80.2664126,25.7552817],[-80.266642,25.7552727],[-80.2662395,25.752664]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3028346,25.7682879],[-80.2954263,25.7685212],[-80.2928529,25.7712834],[-80.3000259,25.7742709],[-80.3028346,25.7682879]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1932256,25.8153361],[-80.1932256,25.8153361],[-80.1962503,25.8206981],[-80.1970874,25.819012],[-80.2046386,25.820372],[-80.2008864,25.8167332],[-80.1932256,25.8153361]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.218791,25.8288319],[-80.2122725,25.8304417],[-80.2136767,25.8326901],[-80.2149485,25.8345818],[-80.2183551,25.8309762],[-80.218791,25.8288319]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2130673,25.800085],[-80.2130673,25.800085],[-80.2069565,25.8015571],[-80.2069565,25.8015571],[-80.2122648,25.8087139],[-80.2130673,25.800085]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2992887,25.8642998],[-80.2924814,25.8722919],[-80.2991773,25.8711795],[-80.3020447,25.8699474],[-80.3008212,25.864941],[-80.2992887,25.8642998]]]}},{""type"":""Feature"",""id"":11,""properties"":{""name"":""CLUSTER-11""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3375881,25.8874853],[-80.3341548,25.8895404],[-80.3339701,25.8905984],[-80.3411234,25.8921806],[-80.3414502,25.8912889],[-80.341168,25.8898181],[-80.3375881,25.8874853]]]}},{""type"":""Feature"",""id"":12,""properties"":{""name"":""CLUSTER-12""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2548376,25.7227923],[-80.2508757,25.724679],[-80.2454013,25.7325963],[-80.2454285,25.7415429],[-80.2454491,25.7420016],[-80.2579066,25.7399681],[-80.2589535,25.735533],[-80.2548376,25.7227923]]]}},{""type"":""Feature"",""id"":13,""properties"":{""name"":""CLUSTER-13""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2255746,25.7425938],[-80.2249251,25.7426394],[-80.2131076,25.7568318],[-80.2128608,25.762845],[-80.2171405,25.7628341],[-80.2380256,25.7612338],[-80.243891,25.7536799],[-80.2255746,25.7425938]]]}}]}"
1250b133-5388-44f3-bb3c-9df5ce8712ce,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1283033,25.8585723],[-80.1283033,25.8585723],[-80.1283033,25.8585723],[-80.1283033,25.8585723]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2346539,25.8067455],[-80.2346539,25.8067455],[-80.2346539,25.8067455],[-80.2346539,25.8067455]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2519689,25.7756849],[-80.247236,25.7810084],[-80.2443401,25.7857478],[-80.2483158,25.7887299],[-80.2515124,25.7821404],[-80.2519689,25.7756849]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1969654,25.8013044],[-80.1969654,25.8013044],[-80.1969654,25.8013044],[-80.1969654,25.8013044]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3517873,25.9098307],[-80.3513835,25.9108788],[-80.3507774,25.9159214],[-80.3517558,25.920376],[-80.3555817,25.9193081],[-80.3534954,25.9119795],[-80.3517873,25.9098307]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3065483,25.8786507],[-80.3065483,25.8786507],[-80.3125263,25.8924677],[-80.3106256,25.8876601],[-80.3120936,25.8900795],[-80.319577,25.8898087],[-80.3180663,25.8882089],[-80.3096884,25.8795539],[-80.3065483,25.8786507]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.277072,25.8700466],[-80.277072,25.8700466],[-80.2706444,25.8733974],[-80.2706444,25.8733974],[-80.2707215,25.8750437],[-80.2760912,25.878233],[-80.2763229,25.8782284],[-80.2806211,25.8726946],[-80.277072,25.8700466]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2177092,25.7995785],[-80.214158,25.8047545],[-80.2148601,25.8112221],[-80.2157,25.8163909],[-80.2180087,25.8280911],[-80.2199046,25.8301973],[-80.2223332,25.8176285],[-80.2224264,25.8157252],[-80.2177092,25.7995785]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2767173,25.7619447],[-80.2721096,25.7647372],[-80.2689763,25.7736197],[-80.2704995,25.7770474],[-80.2782987,25.7763291],[-80.2784677,25.7649262],[-80.2767173,25.7619447]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2871373,25.7669984],[-80.2871373,25.7669984],[-80.2863107,25.7739639],[-80.2863107,25.7739639],[-80.2903764,25.7764474],[-80.2960178,25.7767575],[-80.2995315,25.7743264],[-80.3005295,25.7717593],[-80.2957821,25.7686756],[-80.2886327,25.7672027],[-80.2871373,25.7669984]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1985341,25.8280589],[-80.1905466,25.8281043],[-80.1860957,25.8455461],[-80.1931697,25.8470053],[-80.2021052,25.8442977],[-80.1985341,25.8280589]]]}},{""type"":""Feature"",""id"":11,""properties"":{""name"":""CLUSTER-11""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2549972,25.7147542],[-80.2219419,25.7424639],[-80.2162189,25.7495985],[-80.2131191,25.7552633],[-80.2200398,25.781289],[-80.2308036,25.7861241],[-80.2709898,25.7517123],[-80.2700279,25.7448846],[-80.2634703,25.7284482],[-80.2580566,25.7166046],[-80.2549972,25.7147542]]]}}]}"
ea705812-ea7b-48aa-b00c-fe0b3f623c81,"{""type"":""FeatureCollection"",""features"":[]}"
c352b76f-5f8e-437f-81ac-29fcf92f1d6f,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3087572,25.8549049],[-80.3022038,25.8579825],[-80.3022038,25.8579825],[-80.3087572,25.8549049],[-80.3087572,25.8549049]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.136351,25.7737441],[-80.136351,25.7737441],[-80.1334149,25.7763304],[-80.1334149,25.7763304],[-80.1357333,25.7896212],[-80.1373198,25.7898634],[-80.1379328,25.7879253],[-80.1389248,25.7838664],[-80.1380904,25.7774068],[-80.136351,25.7737441]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1357199,25.8089636],[-80.1264311,25.8101531],[-80.1256457,25.8108074],[-80.1265023,25.8249258],[-80.1267163,25.8277187],[-80.1290811,25.8296036],[-80.1309061,25.8288138],[-80.1375452,25.8140375],[-80.1357199,25.8089636]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3558776,25.9034119],[-80.3504233,25.9035056],[-80.3489687,25.9242889],[-80.354014,25.9267873],[-80.3548643,25.9262334],[-80.3566224,25.9220211],[-80.3569944,25.9196629],[-80.3558776,25.9034119]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1371945,25.850265],[-80.1343189,25.8505797],[-80.1290308,25.8542426],[-80.1249592,25.85928],[-80.1232372,25.8620061],[-80.1219762,25.8671892],[-80.1255009,25.8707789],[-80.1267139,25.8707309],[-80.1277664,25.8702245],[-80.1351113,25.8643438],[-80.1394457,25.8604562],[-80.1426251,25.8526735],[-80.1417532,25.8506585],[-80.1371945,25.850265]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2802322,25.8675469],[-80.2738801,25.8678251],[-80.2658103,25.8682517],[-80.264757,25.8688539],[-80.2675987,25.8730187],[-80.2704848,25.8770361],[-80.2965239,25.897446],[-80.3528217,25.8959596],[-80.3553713,25.8893069],[-80.3516997,25.8865365],[-80.3285772,25.8729011],[-80.3250347,25.8714928],[-80.3043472,25.8688092],[-80.2802322,25.8675469]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.265369,25.6967134],[-80.2599645,25.6989806],[-80.2482741,25.7072795],[-80.2038047,25.7530427],[-80.2015562,25.7565194],[-80.1775927,25.8402589],[-80.1793029,25.8497923],[-80.1896027,25.8517694],[-80.2217092,25.8357811],[-80.3071937,25.7707077],[-80.3065515,25.7643106],[-80.265369,25.6967134]]]}}]}"
89ac946e-32e8-4b08-9542-175afe87c5eb,"{""type"":""FeatureCollection"",""features"":[]}"
df60d2d1-0c97-4674-b9f7-ee4b59b01789,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2515971,25.730977],[-80.2515971,25.730977],[-80.2515971,25.730977],[-80.2515971,25.730977]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2902598,25.7705265],[-80.2902598,25.7705265],[-80.2955922,25.7730057],[-80.2902598,25.7705265],[-80.2902598,25.7705265]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.212157,25.834205],[-80.212157,25.834205],[-80.212157,25.834205],[-80.212157,25.834205]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2549587,25.7415774],[-80.2488266,25.744191],[-80.2530214,25.7427044],[-80.2549587,25.7415774]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.268777,25.7644894],[-80.2615532,25.7775722],[-80.2695551,25.7731879],[-80.2759936,25.7667526],[-80.268777,25.7644894]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3167268,25.8822726],[-80.3167268,25.8822726],[-80.3146328,25.8868043],[-80.3146328,25.8868043],[-80.3176136,25.8931226],[-80.3217966,25.8881362],[-80.3217963,25.887706],[-80.3167268,25.8822726]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3440772,25.887764],[-80.3375835,25.8892814],[-80.3375138,25.890497],[-80.3412679,25.8939844],[-80.3491358,25.890063],[-80.3470148,25.8889044],[-80.3440772,25.887764]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2723727,25.8696868],[-80.2723727,25.8696868],[-80.2733447,25.8732578],[-80.2746232,25.8750567],[-80.2767556,25.8773534],[-80.2815143,25.8794325],[-80.2831651,25.8789281],[-80.2814826,25.8698643],[-80.2723727,25.8696868]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2395088,25.7671084],[-80.2395088,25.7671084],[-80.2372014,25.7697691],[-80.2372014,25.7697691],[-80.2426269,25.7833309],[-80.2474104,25.7839617],[-80.2479347,25.7837563],[-80.2465759,25.7761459],[-80.2449285,25.7715601],[-80.2438937,25.7694865],[-80.2433057,25.7691049],[-80.2395088,25.7671084]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2126491,25.8012139],[-80.1966516,25.8194691],[-80.1961449,25.8241751],[-80.198385,25.8268613],[-80.2116347,25.8245947],[-80.2207158,25.8211207],[-80.2236961,25.8182984],[-80.2179205,25.805101],[-80.2126491,25.8012139]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2272354,25.7492968],[-80.2153021,25.75304],[-80.2053257,25.7563732],[-80.2182947,25.7673127],[-80.2284964,25.7684269],[-80.2456498,25.7559524],[-80.2411371,25.7522732],[-80.2338324,25.7501958],[-80.2272354,25.7492968]]]}}]}"
691b91b2-81a6-49cb-b19c-73bbb66c006f,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3512882,25.9183662],[-80.3512882,25.9183662],[-80.3512882,25.9183662],[-80.3512882,25.9183662]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1342821,25.8164708],[-80.1339788,25.817085],[-80.1340017,25.8191127],[-80.1342821,25.8164708]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1370031,25.7791783],[-80.1355709,25.7817621],[-80.1348849,25.783277],[-80.1365881,25.7813143],[-80.1370031,25.7791783]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3020972,25.8555093],[-80.3019145,25.8598656],[-80.3018494,25.8614757],[-80.3025577,25.8623006],[-80.3031212,25.8611871],[-80.3020972,25.8555093]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1235471,25.861056],[-80.122384,25.8613804],[-80.1215315,25.8667983],[-80.1267833,25.8684693],[-80.1293965,25.8627229],[-80.1235471,25.861056]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2704664,25.8724155],[-80.2704664,25.8724155],[-80.2695525,25.8765074],[-80.2695525,25.8765074],[-80.270388,25.8796827],[-80.2727967,25.8800808],[-80.2751836,25.8781883],[-80.2763201,25.8755115],[-80.2704664,25.8724155]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.355241,25.9023433],[-80.350986,25.9027106],[-80.3508158,25.9036964],[-80.3521852,25.9086373],[-80.3564659,25.9080332],[-80.3563873,25.9063291],[-80.355241,25.9023433]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.228249,25.7919648],[-80.228249,25.7919648],[-80.2270851,25.7949533],[-80.2270851,25.7949533],[-80.2371143,25.8057127],[-80.237548,25.8048081],[-80.2382785,25.798695],[-80.23538,25.7950617],[-80.2333297,25.7936629],[-80.228249,25.7919648]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.326107,25.8756372],[-80.326107,25.8756372],[-80.3253004,25.8766176],[-80.3253004,25.8766176],[-80.3294517,25.8828485],[-80.333975,25.8874984],[-80.3348368,25.8880866],[-80.3372789,25.8895243],[-80.3479182,25.893859],[-80.3478903,25.8932022],[-80.3470449,25.8894939],[-80.3466918,25.8881038],[-80.3453287,25.8858843],[-80.3302721,25.8766934],[-80.326107,25.8756372]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3010384,25.8711492],[-80.3010384,25.8711492],[-80.2945979,25.8732195],[-80.2945979,25.8732195],[-80.2962851,25.8922244],[-80.3197615,25.8925271],[-80.3222056,25.8902139],[-80.3217622,25.8869499],[-80.3209503,25.884807],[-80.3010384,25.8711492]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2613722,25.7074406],[-80.2561824,25.7114261],[-80.2139308,25.7504729],[-80.2040596,25.7626022],[-80.1933724,25.7908201],[-80.1744721,25.8485842],[-80.1793908,25.850877],[-80.1959893,25.8494768],[-80.2965154,25.7731361],[-80.2986253,25.7666801],[-80.2613722,25.7074406]]]}}]}"
7627848d-c2ba-47f3-b5ce-1466e018c0c7,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2780581,25.8736805],[-80.2780581,25.8736805],[-80.2780581,25.8736805],[-80.2780581,25.8736805]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2675901,25.7488373],[-80.2675901,25.7488373],[-80.2675901,25.7488373],[-80.2675901,25.7488373]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2327751,25.7752465],[-80.2327751,25.7752465],[-80.2327751,25.7752465],[-80.2327751,25.7752465]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2334774,25.806243],[-80.2334774,25.806243],[-80.2334774,25.806243],[-80.2334774,25.806243]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3054482,25.8555053],[-80.3054482,25.8555053],[-80.3038102,25.8562639],[-80.3038102,25.8562639],[-80.3054482,25.8555053]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3476061,25.8849967],[-80.3476061,25.8849967],[-80.3476061,25.8849967],[-80.3493218,25.8919502],[-80.3537775,25.8892386],[-80.3535042,25.8890193],[-80.3476061,25.8849967]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3151647,25.8817924],[-80.3151647,25.8817924],[-80.313411,25.8866283],[-80.313411,25.8866283],[-80.3146792,25.8949386],[-80.3174308,25.8882123],[-80.3151647,25.8817924]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2910889,25.8889556],[-80.2910889,25.8889556],[-80.2947991,25.8946605],[-80.3006791,25.8955354],[-80.2997157,25.8946165],[-80.3022818,25.8948148],[-80.2981686,25.8894352],[-80.2968541,25.8889992],[-80.2910889,25.8889556]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1955363,25.786785],[-80.1955363,25.786785],[-80.1927609,25.7913816],[-80.1927609,25.7913816],[-80.1951661,25.7935628],[-80.1986847,25.796152],[-80.1989547,25.7931456],[-80.1986067,25.7907106],[-80.1955363,25.786785]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3517834,25.9138508],[-80.3509674,25.9196559],[-80.3509726,25.9197696],[-80.3522435,25.9224793],[-80.3551835,25.9228115],[-80.3569592,25.9188876],[-80.3522078,25.914222],[-80.3517834,25.9138508]]]}},{""type"":""Feature"",""id"":10,""properties"":{""name"":""CLUSTER-10""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2252389,25.7437651],[-80.2183445,25.7439952],[-80.2045318,25.7569737],[-80.2016837,25.7803651],[-80.2062361,25.780041],[-80.218232,25.7731473],[-80.2257152,25.7667212],[-80.2252389,25.7437651]]]}},{""type"":""Feature"",""id"":11,""properties"":{""name"":""CLUSTER-11""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2488361,25.7347105],[-80.2488361,25.7347105],[-80.2341626,25.7442779],[-80.2341626,25.7442779],[-80.2353412,25.7590834],[-80.2435822,25.7727606],[-80.2720464,25.7740011],[-80.2782332,25.7742168],[-80.286792,25.773976],[-80.2928487,25.7711643],[-80.2830392,25.7580555],[-80.2488361,25.7347105]]]}},{""type"":""Feature"",""id"":12,""properties"":{""name"":""CLUSTER-12""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2115784,25.8009015],[-80.2063866,25.8016786],[-80.2040724,25.802455],[-80.1983046,25.8045714],[-80.1966649,25.8060332],[-80.1955394,25.8081026],[-80.1854664,25.8414932],[-80.1890261,25.844594],[-80.1908826,25.8452641],[-80.1982344,25.8457776],[-80.2265462,25.8286787],[-80.227213,25.8278049],[-80.2274237,25.8256456],[-80.2276527,25.8145057],[-80.2140319,25.8014593],[-80.2129577,25.8012031],[-80.2115784,25.8009015]]]}}]}"
32ae2f3d-5dff-4696-a518-8ac3411df56c,"{""type"":""FeatureCollection"",""features"":[{""type"":""Feature"",""id"":0,""properties"":{""name"":""CLUSTER-0""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1380951,25.7775521],[-80.1380951,25.7775521],[-80.1380951,25.7775521],[-80.1380951,25.7775521]]]}},{""type"":""Feature"",""id"":1,""properties"":{""name"":""CLUSTER-1""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1227158,25.8610904],[-80.1208371,25.8650954],[-80.1251555,25.8666608],[-80.1264098,25.8669615],[-80.124086,25.862542],[-80.1227158,25.8610904]]]}},{""type"":""Feature"",""id"":2,""properties"":{""name"":""CLUSTER-2""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3117651,25.8515498],[-80.3097111,25.8518971],[-80.2975664,25.8584862],[-80.2957247,25.8609983],[-80.3007917,25.8635289],[-80.3111333,25.8562666],[-80.3117651,25.8515498]]]}},{""type"":""Feature"",""id"":3,""properties"":{""name"":""CLUSTER-3""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3542705,25.9171581],[-80.3513402,25.919804],[-80.3516403,25.9223564],[-80.3562649,25.9232599],[-80.356929,25.9182231],[-80.3542705,25.9171581]]]}},{""type"":""Feature"",""id"":4,""properties"":{""name"":""CLUSTER-4""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.3379985,25.8850273],[-80.3355821,25.8862761],[-80.334374,25.889289],[-80.3471579,25.8932023],[-80.3495745,25.8907038],[-80.3462625,25.8868909],[-80.3379985,25.8850273]]]}},{""type"":""Feature"",""id"":5,""properties"":{""name"":""CLUSTER-5""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1828302,25.8435562],[-80.1748926,25.8464434],[-80.1759877,25.8533428],[-80.1801697,25.8526945],[-80.1841215,25.8520472],[-80.1845055,25.8511002],[-80.1828302,25.8435562]]]}},{""type"":""Feature"",""id"":6,""properties"":{""name"":""CLUSTER-6""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2812236,25.724569],[-80.2786463,25.7276838],[-80.2783924,25.7310458],[-80.2785295,25.7348106],[-80.285384,25.7337367],[-80.2859579,25.7282294],[-80.2812236,25.724569]]]}},{""type"":""Feature"",""id"":7,""properties"":{""name"":""CLUSTER-7""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.1292894,25.8048354],[-80.1282341,25.8058219],[-80.1262523,25.8091671],[-80.1275729,25.8184755],[-80.1302905,25.8242044],[-80.1305571,25.8241995],[-80.13407,25.8211788],[-80.1349504,25.8167277],[-80.1307866,25.8048901],[-80.1292894,25.8048354]]]}},{""type"":""Feature"",""id"":8,""properties"":{""name"":""CLUSTER-8""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2751952,25.8687107],[-80.2709893,25.8697657],[-80.2730282,25.8738225],[-80.2904463,25.8884029],[-80.3018687,25.8940467],[-80.3147067,25.8955537],[-80.3216869,25.889001],[-80.3256755,25.876456],[-80.3249231,25.8753833],[-80.3105925,25.8723978],[-80.2751952,25.8687107]]]}},{""type"":""Feature"",""id"":9,""properties"":{""name"":""CLUSTER-9""},""geometry"":{""type"":""Polygon"",""coordinates"":[[[-80.2604183,25.7024423],[-80.2536138,25.7085412],[-80.2186408,25.7407354],[-80.1991845,25.7628017],[-80.1951821,25.7694882],[-80.1912527,25.7943185],[-80.1898332,25.8399142],[-80.1944491,25.8440243],[-80.2216242,25.8378034],[-80.2240751,25.8368333],[-80.3040937,25.7728973],[-80.3066162,25.7662039],[-80.2699294,25.713322],[-80.2604183,25.7024423]]]}}]}"
2bc861c4-34aa-4433-aa46-23ebed4a2122,"{""type"":""FeatureCollection"",""features"":[]}"


In [0]:
import h3
from pyspark.sql.functions import udf

@udf("string")
def to_h3(lat, lng, precision):
  h = h3.geo_to_h3(lat, lng, precision)
  return h.upper()

In [0]:
from pyspark.sql import functions as F

anomalous_transactions = (
  spark
    .read
    .table('geospatial_miami.transactions')
    .withColumn('h3', to_h3(F.col('latitude'), F.col('longitude'), F.lit(10)))
    .join(tiles, ['user', 'h3'], 'left_outer')
    .filter(F.expr('cluster IS NULL'))
    .drop('h3', 'cluster', 'tf_idf')
)

display(anomalous_transactions)

user,latitude,longitude,amount
7e7bb66b-44ea-4d7d-9450-be80f1e28990,25.7359423,-80.2342303,142.25
f0485504-04f4-4bc6-912f-6913d0519223,25.7358308,-80.2343706,121.28
c3376825-04d8-49b7-8c89-cd3fd14df8b7,25.7354298,-80.2343998,184.89
c3376825-04d8-49b7-8c89-cd3fd14df8b7,25.7357477,-80.2345242,17.14
ae55e73b-9c0b-4a34-8e05-d31bdf090c70,25.7356612,-80.2347384,198.08
89ac946e-32e8-4b08-9542-175afe87c5eb,25.7351086,-80.2353542,84.72
3ca02d03-40b1-4a2a-82a5-548e190b3fbf,25.734605,-80.2351789,189.67
c96f908c-b1b3-4506-951c-e37913635e07,25.7372672,-80.2311178,159.05
7627848d-c2ba-47f3-b5ce-1466e018c0c7,25.7370008,-80.2308803,49.25
be19a72d-86ac-4998-8397-12f3c3c1d8e6,25.7467429,-80.2293584,131.74


In [0]:
#7e7bb66b-44ea-4d7d-9450-be80f1e28990
import folium
from folium import plugins
from pyspark.sql import functions as F

user = 'b8fe8ae7-3450-416f-ae75-a03fb0488fbf'
anomalies = anomalous_transactions.filter(F.col('user') == user).toPandas()[:4]
#clusters = model_personalized.filter(F.col('user') == user).toPandas().cluster.iloc[0]

personalized = folium.Map([25.761681,-80.191788], zoom_start=12, width='80%', height='100%')
folium.TileLayer('Stamen Toner').add_to(personalized)

for i, point in anomalies.iterrows():
  folium.Marker([point.latitude, point.longitude], popup=point.amount).add_to(personalized)

#folium.GeoJson(clusters, name="geojson").add_to(personalized)
personalized

In [0]:
#migrating anomalies data only  for b8fe8ae7-3450-416f-ae75-a03fb0488fbf as cluster data is already exported
anomalies.to_csv('/dbfs/FileStore/geospatial_fraud_detection/anomalies_miami_b8fe8ae7-3450-416f-ae75-a03fb0488fbf.csv', index=False)
#anomalies