In [19]:
import numpy as np
import tqdm
import pandas as pd
import pydeck as pdk
import os

os.environ['MAPBOX_API_KEY'] = "..." # Input something

# Process data

In [2]:
df = pd.read_parquet('data/geolife_trajectories_1_3.parquet')
print(df.shape[0])
df.head()

24876978


Unnamed: 0,lat,lon,datetime,altitude_meters,tripId,userId
0,22.816448,108.332067,2009-02-04 09:57:32,0.0,023_20090204095732,23
1,22.815663,108.33221,2009-02-04 09:57:37,0.0,023_20090204095732,23
2,22.815722,108.33218,2009-02-04 09:57:42,0.0,023_20090204095732,23
3,22.815742,108.332167,2009-02-04 09:57:47,0.0,023_20090204095732,23
4,22.815738,108.332145,2009-02-04 09:57:52,0.0,023_20090204095732,23


# Pydeck - deck.gl python binding

The pydeck library is a set of Python bindings for making spatial visualizations with deck.gl, optimized for a Jupyter Notebook environment.

Lots of examples here: https://github.com/uber/deck.gl/tree/master/bindings/pydeck/examples


# Load the data into pydeck layers

Very long list of different visualizations: 

ArcLayer
BitmapLayer
ColumnLayer
ContourLayer
GeoJsonLayer
HeatmapLayer
IconLayer
LineLayer
PathLayer
PointCloudLayer
PolygonLayer
SolidPolygonLayer
ScatterplotLayer
TextLayer
GPUGridLayeradvanced
GreatCircleLayer
CPUGridLayeradvanced
GridCellLayer
HexagonLayer
H3ClusterLayer
H3HexagonLayer
GridLayer
S2Layer
ScenegraphLayer
ScreenGridLayer
SimpleMeshLayer
TileLayer
Tile3DLayer
TripsLayer


In [32]:
# Make a smaller dataframe due to memory restraints
df_small = df.head(500_000)
df_smaller = df.head(5_000)

df_small.describe()

Unnamed: 0,lat,lon,altitude_meters
count,500000.0,500000.0,500000.0
mean,37.181423,115.329815,69.139782
std,6.047095,2.740063,99.874137
min,22.798702,108.253305,-1560.8808
25%,39.890345,116.30879,28.0416
50%,39.984248,116.348631,45.72
75%,40.05269,116.41707,70.4088
max,47.6693,122.467933,1769.9736


In [24]:
layer_hex = pdk.Layer(
    'HexagonLayer',
    df_small,
    get_position=['lon', 'lat'],
    auto_highlight=True,
    elevation_scale=50,
    pickable=True,
    elevation_range=[0, 3000],
    extruded=True,
    coverage=1)

view_state = pdk.data_utils.compute_view(points=df_small[['lon', 'lat']], view_proportion=0.9)

In [25]:
layer_scatter = pdk.Layer(
    'ScatterplotLayer',
    df_smaller,
    get_radius=500,
    get_fill_color='color',
    get_position=['lon', 'lat'])

view_state = pdk.data_utils.compute_view(points=df_smaller[['lon', 'lat']], view_proportion=0.9)

In [26]:
layer_pointcloud = pdk.Layer(
    'PointCloudLayer',
    df_small[['lon', 'lat', 'altitude_meters']],
    get_position='@@=[lon, lat, altitude_meters]',
    get_normal=[0, 0, 1],
    get_color=[255, 0, 100, 200],
    pickable=True,  
    auto_highlight=True,
    point_size=1)

view_state = pdk.data_utils.compute_view(points=df_small[['lon', 'lat']], view_proportion=0.9)

In [29]:
# Set the viewport location

view_state.max_pitch = 360
view_state.pitch = 40
view_state.bearing = 120


In [33]:
# Render
# r = pdk.Deck(layers=[layer_scatter], initial_view_state=view_state)
r = pdk.Deck(layers=[layer_hex], initial_view_state=view_state)
# r = pdk.Deck(layers=[layer_pointcloud], initial_view_state=view_state)

# Save ouput to a file
r.to_html('data/pydeck.html')

'/home/tjansson/code/geospatial/data/pydeck.html'