# Hands-on Python Excersises:

- Tutorial 0 : Getting Started with Movingpandas
- Tutorial 1 : Trajectory Preprocessing
- ***Tutorial 2 : Taxi Trajectory Dashboard Visualization***


```
! pip install movingpandas

! pip install cartopy

! pip install geoviews

! pip install mapclassify

! pip install keplergl
```

In [None]:
# ! pip install movingpandas
# ! pip install cartopy
# ! pip install geoviews
# ! pip install mapclassify
# ! pip install keplergl

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In this dashboard, first we are going to find origin-destination matrices and major flow analysis and then show them on a simple map dashboard.

### Load and prepare data

In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import shapely as shp
import matplotlib.pyplot as plt

from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta

import folium

import warnings

warnings.filterwarnings("ignore")

In [None]:
path = "/content/drive/MyDrive/i4Geo_Workshop/new_taxi_porto.csv"

taxi_data = pd.read_csv(path)
# taxi_data = taxi_data.head(50000)
taxi_data.head(2)

In [None]:
# prompt: filter the taxi data based on datetime between 2013-07-01 00:00:00 and 2013-07-02 00:00:00

# Convert 'TIMESTAMP' column to datetime objects
taxi_data['datetime'] = pd.to_datetime(taxi_data['datetime'])

# Filter the dataframe based on the datetime range
filtered_taxi_data = taxi_data[(taxi_data['datetime'] >= '2013-07-01 00:00:00') &
                               (taxi_data['datetime'] <= '2013-07-02 00:00:00')]

filtered_taxi_data


In [None]:
traj_collection = mpd.TrajectoryCollection(filtered_taxi_data, "TRIP_ID", t="datetime", y="latitude", x="longitude", crs=4326)
print(traj_collection)

## Extracting O-D Matrix

In [None]:
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from shapely.geometry import Point, LineString, Polygon, MultiPoint
import numpy as np
import math

In [None]:
# This function creates a line (a LineString) between two points representing the origin and destination.
def make_od_line(row, od_clusters):
    # We create a line connecting the origin (first element) and destination (last element).
    return LineString([
        od_clusters.loc[row['od'][0]].geometry,  # Origin point geometry
        od_clusters.loc[row['od'][-1]].geometry  # Destination point geometry
    ])


# This function determines the "centermost" point in a cluster, which is the point closest to the cluster's centroid.
def get_centermost_point(cluster):
    # Create a MultiPoint object from all the points in the cluster and calculate its centroid.
    centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
    # Find the point in the cluster that is closest to the centroid using the great-circle distance.
    centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m)
    # Return the centermost point as a Shapely Point.
    # Note: The coordinate order is swapped (lat, lon) to (lon, lat) as needed.
    return Point(tuple(centermost_point)[1], tuple(centermost_point)[0])


# This function applies the DBSCAN clustering algorithm to group OD points based on their latitude and longitude.
def dbscan_cluster_ods(od_gdf, eps):
    # Convert the 'lat' and 'lon' columns of the GeoDataFrame into a numpy array for clustering.
    matrix = od_gdf[['lat', 'lon']].to_numpy()
    # Apply DBSCAN clustering with the following settings:
    # - eps: The maximum distance between two samples for one to be considered as in the neighborhood of the other.
    # - min_samples=1: Even a single point can form a cluster.
    # - algorithm='ball_tree' and metric='haversine': Suitable for geographic coordinates.
    db = DBSCAN(eps=eps, min_samples=1, algorithm='ball_tree', metric='haversine').fit(np.radians(matrix))
    # Retrieve the cluster labels assigned by DBSCAN for each point.
    cluster_labels = db.labels_
    # Determine the total number of unique clusters.
    num_clusters = len(set(cluster_labels))
    # Create a Series where each element is an array of points belonging to a particular cluster.
    clusters = pd.Series([matrix[cluster_labels == n] for n in range(num_clusters)])
    # Return the cluster labels and the clusters themselves.
    return cluster_labels, clusters


# This function extracts the origin and destination points from a collection of trips and combines them into one GeoDataFrame.
def extract_od_gdf(trips):
    # Get the starting locations of all trips.
    origins = trips.get_start_locations()
    origins = origins.reset_index(drop=True)
    origins['type'] = '0'  # Label these points as origins.
    # Assign each origin a unique trip identifier.
    origins['traj_id'] = [trip.id for trip in trips]

    # Get the ending locations of all trips.
    destinations = trips.get_end_locations()
    destinations = destinations.reset_index(drop=True)
    destinations['type'] = '1'  # Label these points as destinations.
    # Assign each destination the corresponding trip identifier.
    destinations['traj_id'] = [trip.id for trip in trips]

    # Combine both origins and destinations into one GeoDataFrame.
    od = pd.concat([origins, destinations], ignore_index=True)
    # Extract latitude and longitude values from the point geometry for further processing.
    od['lat'] = od.geometry.y
    od['lon'] = od.geometry.x
    return od

# This function clusters the OD points and creates representative cluster nodes.
def extract_od_clusters(od_gdf, eps):
    # Cluster the OD points using DBSCAN.
    cluster_labels, clusters = dbscan_cluster_ods(od_gdf, eps)
    # Assign the computed cluster labels to each point in the GeoDataFrame.
    od_gdf['cluster'] = cluster_labels
    # Group the points by their cluster label.
    od_by_cluster = pd.DataFrame(od_gdf).groupby(['cluster'])
    # Count the number of points in each cluster.
    clustered = od_by_cluster.size().to_frame(name='n')
    # Calculate a symbol size for each cluster (used later for visualization) based on the number of points.
    clustered['symbol_size'] = (clustered['n']**(1/2)) * 10  # Larger clusters get bigger symbols.
    # Determine a representative geometry (centermost point) for each cluster.
    clustered['geometry'] = clusters.map(get_centermost_point)
    # Filter out clusters with zero points and sort them by size (number of points) in descending order.
    clustered = clustered[clustered['n'] > 0].sort_values(by='n', ascending=False)
    return clustered

# This function creates an Origin-Destination (OD) matrix from a collection of trips.
# It also associates each OD pair with a geometry (a line connecting the representative cluster points).
def extract_od_matrix(traj_collection, eps, directed=True):
    # Extract origin-destination points from the trips.
    od_gdf = extract_od_gdf(traj_collection)
    # Cluster these points to create representative nodes.
    matrix_nodes = extract_od_clusters(od_gdf, eps)
    # Group the OD points by trip ID, preserving the order (origins come before destinations).
    od_by_traj_id = pd.DataFrame(od_gdf).sort_values(['type']).groupby(['traj_id'])
    # For each trip, retrieve the unique cluster labels for its origin and destination.
    # The unique() function maintains the order of the points.
    od_by_traj_id = od_by_traj_id['cluster'].unique().to_frame(name='clusters')

    # Depending on whether the OD matrix should be directed or undirected:
    if directed:
        # For directed OD, keep the order (origin, destination) as they are.
        od_matrix = od_by_traj_id.groupby(od_by_traj_id['clusters'].apply(tuple)).count().rename({'clusters': 'n'}, axis=1)
    else:
        # For undirected OD, sort the cluster labels so that the order does not matter.
        od_matrix = od_by_traj_id.groupby(od_by_traj_id['clusters'].apply(sorted).apply(tuple)).count().rename({'clusters': 'n'}, axis=1)

    # Add the OD pair as a new column based on the grouping index.
    od_matrix['od'] = od_matrix.index
    # For each OD pair, create a LineString geometry connecting the corresponding cluster nodes.
    od_matrix['geometry'] = od_matrix.apply(
        lambda x: make_od_line(row=x, od_clusters=matrix_nodes), axis=1
    )
    # Return both the OD matrix (with counts and geometries) and the cluster nodes used for mapping.
    return od_matrix, matrix_nodes

In [None]:
# Define the Earth's radius in kilometers.
KMS_PER_RADIAN = 6371.0088
# Define EPSILON as 0.05 kilometers (50 meters) converted to radians.
EPSILON = 0.03 / KMS_PER_RADIAN
od_matrix, matrix_nodes = extract_od_matrix(traj_collection, EPSILON*5, directed=False)
print("The number of stops point after clustering is {} ".format(len(matrix_nodes)))

In [None]:
matrix_nodes.head(5)

In [None]:
matrix_nodes_geodataframe = gpd.GeoDataFrame(matrix_nodes, geometry='geometry')
matrix_nodes_geodataframe.crs = 4326

m = traj_collection.explore(name = "Taxi trips", style_kwds={"weight": 1}, color="blue")
matrix_nodes_geodataframe.explore(
    m = m,
    column="symbol_size",
    style_kwds={
        "style_function": lambda x: {"radius": math.sqrt(x["properties"]["symbol_size"])*2},
    },
    name="Stop points",
)

folium.TileLayer("CartoDB positron").add_to(m)
folium.LayerControl().add_to(m)

m

In [None]:
od_matrix_geodataframe = gpd.GeoDataFrame(od_matrix, geometry='geometry')
od_matrix_geodataframe.crs = 4326

m = od_matrix_geodataframe.explore(
    name = "Taxi flows",
    style_kwds={
        "style_function": lambda x: {"weight": (x["properties"]["n"])**0.6}
    },
    color="blue")


matrix_nodes_geodataframe.explore(
    m = m,
    column="symbol_size",
    style_kwds={
        "style_function": lambda x: {"radius": math.sqrt(x["properties"]["symbol_size"])*2},
    },
    name="Stop points",
)

folium.TileLayer("CartoDB positron").add_to(m)
folium.LayerControl().add_to(m)

m

## Advanced Visualization using Kepler GL

https://github.com/keplergl/kepler.gl/blob/master/bindings/kepler.gl-jupyter/README.md

In [None]:
from keplergl import KeplerGl

Prepare data for Kepler GL

In [None]:
od_geodataframe = extract_od_gdf(traj_collection)
od_geodataframe.head(3)

In [None]:
od_matrix_geodataframe.reset_index(drop=True, inplace=True)
od_matrix_geodataframe.head(3)

In [None]:
# Define a function to split the linestring coordinates into separate columns
def split_coords(line):
    coords = line.coords
    source_lon, source_lat = coords[0]
    target_lon, target_lat = coords[1]
    return pd.Series([source_lat, source_lon, target_lat, target_lon])

# Apply the split_coords function to the geometry column
od_matrix_geodataframe[['source_lat', 'source_lon', 'target_lat', 'target_lon']] = od_matrix_geodataframe['geometry'].apply(split_coords)


In [None]:
od_matrix_geodataframe.head(3)

In [None]:
od_geodataframe["datetime"] = od_geodataframe["datetime"].astype(str)
od_matrix_geodataframe["n"] = od_matrix_geodataframe["n"].astype(str)
od_matrix_geodataframe["od"] = od_matrix_geodataframe["od"].astype(str)

In [None]:


# Create KeplerGl instance
map = KeplerGl()

# add data to Kepler
map.add_data(data=od_geodataframe, name="OD points")
map.add_data(data=od_matrix_geodataframe, name="OD matrix")
map.add_data(data=taxi_data, name="Taxi Data")



In [None]:
map.height = 700
map.show(center_map=True)

In [None]:
map.save_to_html(file_name='map.html')

Config

In [None]:
config = {'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['Taxi Data'], 'id': 'zm8wdp8wo', 'name': ['datetime'], 'type': 'timeRange', 'value': [1372702510454.332, 1372704384454.3318], 'plotType': {'interval': '1-hour', 'defaultTimeFormat': 'L  H A', 'type': 'histogram', 'aggregation': 'sum'}, 'animationWindow': 'free', 'yAxis': None, 'view': 'enlarged', 'speed': 0.1, 'enabled': True}], 'layers': [ {"id": "egzhjkq", "type": "hexagon", "config": { "dataId": "Taxi Data", "label": "Taxi Hexbins", "color": [137,218,193], "highlightColor": [252,242,26,255], "columns": { "lat": "latitude", "lng": "longitude" }, "isVisible": True, "visConfig": { "opacity": 0.8, "worldUnitSize": 0.3, "resolution": 8, "colorRange": { "name": "Global Warming", "type": "sequential", "category": "Uber", "colors": [ "#4C0035", "#880030", "#B72F15", "#D6610A", "#EF9100", "#FFC300" ] }, "coverage": 1, "sizeRange": [0,500], "percentile": [0,100], "elevationPercentile": [0,100], "elevationScale": 7, "enableElevationZoomFactor": True, "fixedHeight": False, "colorAggregation": "count", "sizeAggregation": "average", "enable3d": True }, "hidden": False, "textLabel": [ { "field": None, "color": [255,255,255], "size": 18, "offset": [0,0], "anchor": "start", "alignment": "center", "outlineWidth": 0, "outlineColor": [255,0,0,255], "background": False, "backgroundColor": [0,0,200,255 ] } ] }, "visualChannels": { "colorField": None, "colorScale": "quantile", "sizeField": None, "sizeScale": "linear"}}, {'id': 'qle7gl', 'type': 'point', 'config': {'dataId': 'Taxi Data', 'columnMode': 'points', 'label': 'Taxi GPS Points', 'color': [221, 178, 124], 'highlightColor': [252, 242, 26, 255], 'columns': {'lat': 'latitude', 'lng': 'longitude'}, 'isVisible': False, 'visConfig': {'radius': 10, 'fixedRadius': False, 'opacity': 0.8, 'outline': False, 'thickness': 2, 'strokeColor': None, 'colorRange': {'colors': ['#12939A', '#DDB27C', '#88572C', '#FF991F', '#F15C17', '#223F9A', '#DA70BF', '#125C77', '#4DC19C', '#776E57', '#17B8BE', '#F6D18A', '#B7885E', '#FFCB99', '#F89570', '#829AE3', '#E79FD5', '#1E96BE', '#89DAC1', '#B3AD9E'], 'name': 'Uber Viz Qualitative', 'type': 'qualitative', 'category': 'Uber'}, 'strokeColorRange': {'name': 'Global Warming', 'type': 'sequential', 'category': 'Uber', 'colors': ['#4C0035', '#880030', '#B72F15', '#D6610A', '#EF9100', '#FFC300']}, 'radiusRange': [0, 50], 'filled': True, 'billboard': False, 'allowHover': True, 'showNeighborOnHover': False, 'showHighlightColor': True}, 'hidden': False, 'textLabel': [{'field': None, 'color': [255, 255, 255], 'size': 18, 'offset': [0, 0], 'anchor': 'start', 'alignment': 'center', 'outlineWidth': 0, 'outlineColor': [255, 0, 0, 255], 'background': False, 'backgroundColor': [0, 0, 200, 255]}]}, 'visualChannels': {'colorField': {'name': 'TAXI_ID', 'type': 'integer'}, 'colorScale': 'quantile', 'strokeColorField': None, 'strokeColorScale': 'quantile', 'sizeField': None, 'sizeScale': 'linear'}}, {'id': 'qsbthxi', 'type': 'cluster', 'config': {'dataId': 'OD points', 'label': 'OD points', 'color': [179, 173, 158], 'highlightColor': [252, 242, 26, 255], 'columns': {'lat': 'lat', 'lng': 'lon'}, 'isVisible': False, 'visConfig': {'opacity': 0.88, 'clusterRadius': 55.7, 'colorRange': {'colors': ['#223F9A', '#483C9A', '#633797', '#793192', '#8D298C', '#9E2184', '#AD177A', '#BA0E6F', '#C40A63', '#CC1157', '#D7244D', '#E53A46', '#F04F3D', '#FA6434', '#FF7929', '#FF8E1C', '#FFA308', '#FFB900', '#FFCE00', '#FAE300'], 'name': 'UberPool', 'type': 'diverging', 'category': 'Uber'}, 'radiusRange': ['10', 50], 'colorAggregation': 'average'}, 'hidden': False, 'textLabel': [{'field': None, 'color': [255, 255, 255], 'size': 18, 'offset': [0, 0], 'anchor': 'start', 'alignment': 'center', 'outlineWidth': 0, 'outlineColor': [255, 0, 0, 255], 'background': False, 'backgroundColor': [0, 0, 200, 255]}]}, 'visualChannels': {'colorField': {'name': '0', 'type': 'integer'}, 'colorScale': 'quantize'}}, {'id': 'lq2u2ni', 'type': 'arc', 'config': {'dataId': 'OD matrix', 'columnMode': 'points', 'label': 'OD matrix Arc', 'color': [146, 38, 198], 'highlightColor': [252, 242, 26, 255], 'columns': {'lat0': 'source_lat', 'lng0': 'source_lon', 'lat1': 'target_lat', 'lng1': 'target_lon'}, 'isVisible': False, 'visConfig': {'opacity': 0.76, 'thickness': 2, 'colorRange': {'colors': ['#223F9A', '#483C9A', '#633797', '#793192', '#8D298C', '#9E2184', '#AD177A', '#BA0E6F', '#C40A63', '#CC1157', '#D7244D', '#E53A46', '#F04F3D', '#FA6434', '#FF7929', '#FF8E1C', '#FFA308', '#FFB900', '#FFCE00', '#FAE300'], 'name': 'UberPool', 'type': 'diverging', 'category': 'Uber'}, 'sizeRange': [0.2, 5.5], 'targetColor': None}, 'hidden': False, 'textLabel': [{'field': None, 'color': [255, 255, 255], 'size': 18, 'offset': [0, 0], 'anchor': 'start', 'alignment': 'center', 'outlineWidth': 0, 'outlineColor': [255, 0, 0, 255], 'background': False, 'backgroundColor': [0, 0, 200, 255]}]}, 'visualChannels': {'colorField': {'name': 'n', 'type': 'integer'}, 'colorScale': 'quantize', 'sizeField': {'name': 'n', 'type': 'integer'}, 'sizeScale': 'linear'}}], 'effects': [], 'interactionConfig': {'tooltip': {'fieldsToShow': {'dausla': [{'name': '0', 'format': None}, {'name': 'n', 'format': None}, {'name': 'od', 'format': None}], 'jm4pas': [{'name': '0', 'format': None}, {'name': 'datetime', 'format': None}, {'name': 'TRIP_ID', 'format': None}, {'name': 'TAXI_ID', 'format': None}, {'name': 'type', 'format': None}], 'tohy2r': [{'name': '0', 'format': None}, {'name': 'TRIP_ID', 'format': None}, {'name': 'TAXI_ID', 'format': None}, {'name': 'datetime', 'format': None}]}, 'compareMode': False, 'compareType': 'absolute', 'enabled': True}, 'brush': {'size': 0.5, 'enabled': False}, 'geocoder': {'enabled': False}, 'coordinate': {'enabled': False}}, 'layerBlending': 'normal', 'overlayBlending': 'normal', 'splitMaps': [], 'animationConfig': {'currentTime': None, 'speed': 1}, 'editor': {'features': [], 'visible': True}}, 'mapState': {'bearing': 0, 'dragRotate': False, 'latitude': 41.16292801347798, 'longitude': -8.660418838014959, 'pitch': 0, 'zoom': 11.480589169169521, 'isSplit': False, 'isViewportSynced': True, 'isZoomLocked': False, 'splitMapViewports': []}, 'mapStyle': {'styleType': 'dark-matter', 'topLayerGroups': {}, 'visibleLayerGroups': {'label': True, 'road': True, 'border': False, 'building': True, 'water': True, 'land': True, '3d building': True}, 'threeDBuildingColor': [15.035172933000911, 15.035172933000911, 15.035172933000911], 'backgroundColor': [0, 0, 0], 'mapStyles': {}}, 'uiState': {'mapControls': {'mapLegend': {'active': False}}}}}

In [None]:
# Create KeplerGl instance
map2 = KeplerGl()

# add data to Kepler
map2.add_data(data=od_geodataframe, name="OD points")
map2.add_data(data=od_matrix_geodataframe, name="OD matrix")
map2.add_data(data=taxi_data, name="Taxi Data")


map2.config = config
map2.height = 700
map2.show(center_map=True)