# Limitations and challenges in mobility data: noise for multiple devices

In [None]:
import random
import uuid
from copy import deepcopy

import geopandas as gpd
import h3
import pandas as pd
from dwells_geometries import gdf_geometries
from IPython.display import Image
from kepler_configs import config_agg_noiseless, config_agg_noisy
from keplergl import KeplerGl
from noisyfier import Noisyfier, Sink
from shapely.geometry import Polygon
from simulate_data import create_ideal_L_shape_trajectory__sydney

# Set to True to render the kepler map or altair charts when running the notebook in local
# If False, screenshots will be shown instead
INTERACTIVE_OUTPUT = True

## Create noiseless trajectories

In [None]:
rd = random.Random()
rd.seed(42)

entity_ids = [uuid.UUID(int=rd.getrandbits(128), version=4).hex[:8] for _ in range(7)]

list_noiseless_trajectories = []
for idx in entity_ids:
    list_noiseless_trajectories += [create_ideal_L_shape_trajectory__sydney(entity_id=idx)]

df_noiseless_trajectories = pd.concat(list_noiseless_trajectories).sort_values("timestamp").reset_index(drop=True)

## Aggregate noiseless trajectories

In [None]:
df_noiseless_trajectories_agg = df_noiseless_trajectories.copy()
df_noiseless_trajectories_agg["H3_min"] = df_noiseless_trajectories_agg.apply(
    lambda row: f"{h3.geo_to_h3(row['latitude'], row['longitude'], resolution=12)}_{row['timestamp'].round('5min')}",
    axis=1,
)

df_noiseless_trajectories_agg["H3"] = df_noiseless_trajectories_agg["H3_min"].apply(lambda row: row.split("_")[0])
df_noiseless_trajectories_agg["min"] = df_noiseless_trajectories_agg["H3_min"].apply(lambda row: row.split("_")[1])

### Create heatmap

In [None]:
df_h3_heatmap_before_noise = (
    pd.DataFrame(df_noiseless_trajectories_agg.groupby("H3").size())
    .reset_index()
    .rename(columns={0: "num_pings_per_hex"})
)

polygonise = lambda hex_id: Polygon(
    h3.h3_to_geo_boundary(hex_id, geo_json=True)
)  # noqa:  E731 do not assign a lambda expression, use a def
gdf_heatmap_before_noise = gpd.GeoDataFrame(
    df_h3_heatmap_before_noise.copy(),
    geometry=list(map(polygonise, df_h3_heatmap_before_noise["H3"])),
    crs="EPSG:4326",
)

gdf_heatmap_before_noise = gdf_heatmap_before_noise.drop(columns=["H3"]).reset_index(drop=True)
kepler_data = {
    "Dwell locations": gdf_geometries,
    "Signal before noise": gdf_heatmap_before_noise,
}

In [None]:
if INTERACTIVE_OUTPUT:
    map_1 = KeplerGl(data=deepcopy(kepler_data), height=1100, config=config_agg_noiseless)
    display(map_1)
else:
    display(Image("images/noise_agg_0_perfect_aggregates.png"))

## Add noise to trajectories

In [None]:
noisifyier = Noisyfier(df_noiseless_trajectories)

noisifyier.spatial_stationary_spread(
    spread=7,
    keywords=(
        "moving",
        "entering",
    ),
    noise_model="gaussian",
)


noisifyier.temporal_varying_spread(
    lower_bound_sec=0,
    upper_bound_sec=60,
    lower_bound_spread_sec=1,
    upper_bound_spread_sec=20,
    noise_model="gumbel",  # can be "uniform", "gaussian", "gumbel"
    variational_model="periodic",  # can be "linear" or "periodic"
)

noisifyier.spatial_varying_spread(
    lower_bound_sec=0,
    upper_bound_sec=5 * 60,
    lower_bound_spread_m=1,
    upper_bound_spread_m=30,
    keywords=("dwell",),
    noise_model="gaussian",
    variational_model="periodic",
)

noisifyier.spatial_varying_spread(
    lower_bound_sec=0,
    upper_bound_sec=180,
    lower_bound_spread_m=1,
    upper_bound_spread_m=8,
    keywords=(
        "moving",
        "entering",
    ),
    noise_model="uniform",
    variational_model="periodic",
)

noisifyier.missing_points(
    frequency=0.6,
    reliability_model="uniform",
    keywords=(),
)

noisifyier.missing_points(
    frequency=0.4,
    reliability_model="bathtub",
    keywords=("moving",),
)

noisifyier.missing_points(frequency=0.95, reliability_model="triangular_increasing", keywords=("dwell",))

noisifyier.gridding(
    precision=3,
    frequency=0.05,
    reliability_model="uniform",
)

noisifyier.erratic_points(
    frequency=0.03,
    reliability_model="uniform",
    keywords=("moving",),
    buffer_width_meters=50,
    total_sampling_for_erratic_points=1000,
)

sink_rugby_club = Sink(151.176491, -33.889456, 0.03, "triangular_increasing")

noisifyier.sink_locations(
    list_sinks=[
        sink_rugby_club,
    ],
)

df_noisy_trajectories = noisifyier.df_pings

### Aggregate noisy trajectories

In [None]:
df_noisy_trajectories_agg = df_noisy_trajectories.copy()
df_noisy_trajectories_agg["H3_min"] = df_noisy_trajectories_agg.apply(
    lambda row: f"{h3.geo_to_h3(row['latitude'], row['longitude'], resolution=12)}_{row['timestamp'].round('5min')}",
    axis=1,
)

df_noisy_trajectories_agg["H3"] = df_noisy_trajectories_agg["H3_min"].apply(lambda row: row.split("_")[0])
df_noisy_trajectories_agg["min"] = df_noisy_trajectories_agg["H3_min"].apply(lambda row: row.split("_")[1])

In [None]:
df_noisy_trajectories.head()

### Create heatmap

In [None]:
df_h3_heatmap_with_noise = (
    pd.DataFrame(df_noisy_trajectories_agg.groupby("H3").size()).reset_index().rename(columns={0: "num_pings_per_hex"})
)

gdf_heatmap_with_noise = gpd.GeoDataFrame(
    df_h3_heatmap_with_noise.copy(),
    geometry=list(map(polygonise, df_h3_heatmap_with_noise["H3"])),
    crs="EPSG:4326",
)

gdf_heatmap_with_noise = gdf_heatmap_with_noise.drop(columns=["H3"]).reset_index(drop=True)
kepler_data = {
    "Dwell locations": gdf_geometries,
    "Signal with noise": gdf_heatmap_with_noise,
}

In [None]:
if INTERACTIVE_OUTPUT:
    map_1 = KeplerGl(data=deepcopy(kepler_data), height=1100, config=config_agg_noisy)
    display(map_1)
else:
    display(Image("images/noise_agg_1_noisy_aggregates.png"))