# Spatiotemporal Analysis

**Note: DFI Queries Will Not Work**

**The Data Flow Index server used for this workshop is no longer running.  The workshop materials are left up _as is_ but queries will not run.  If you would like to trial the Data Flow Index please reach out to General System at [https://www.generalsystem.com/contact-us](https://www.generalsystem.com/contact-us).**

In [None]:
import functools
import json
from collections import Counter
from datetime import datetime, timedelta
from getpass import getpass
from typing import List, Optional, Tuple

import h3.api.numpy_int as h3
import pandas as pd
from dfi import Client
from shapely.geometry import Polygon
from tqdm.notebook import tqdm

In [None]:
token = getpass("Enter your API access token: ")
instance = "sdsc-2-2088"  # sdsc-1-5148
namespace = "gs"
url = "https://api.prod.generalsystem.com"

dfi = Client(token, instance, namespace, url, progress_bar=True)

In [None]:
def unpack_payload(df: pd.DataFrame) -> pd.DataFrame:
    df = df[df["payload"].apply(lambda x: isinstance(x, str))]  # filter out any problem payloads
    df["route"] = df["payload"].apply(lambda x: json.loads(x)["route"])
    df["transportation_mode"] = df["payload"].apply(lambda x: json.loads(x)["transportation_mode"])
    df["start_location_id"] = df["payload"].apply(lambda x: json.loads(x)["start_location_id"])
    df["end_location_id"] = df["payload"].apply(lambda x: json.loads(x)["end_location_id"])

    return df

In [None]:
def hex_period_colocations(
    uid: str, resolution: int, period: int, time_interval: Optional[Tuple[datetime, datetime]] = None
) -> pd.DataFrame:
    uid_df = dfi.get.records(entities=[uid], time_interval=time_interval, add_payload_as_json=True)

    # no data cannot be collocated with no data
    if len(uid_df) == 0:
        return []

    # filter for only dwelling pings to identify co-dwelling instances
    uid_df = unpack_payload(uid_df)
    uid_df = uid_df[uid_df.transportation_mode == "dwelling"]

    # turn off internal progress bar to not pollute STDOUT
    progress_bar = dfi.conn.progress_bar
    dfi.conn.progress_bar = False

    hex_period_df = (
        uid_df.assign(
            hex_id=lambda df: [h3.geo_to_h3(lat, lon, resolution) for lat, lon in zip(df.latitude, df.longitude)],
            period_start=lambda df: df.timestamp.dt.round(f"{period}min"),
        )
        .drop_duplicates(subset=["hex_id", "period_start"])
        .assign(
            period_end=lambda df: df.period_start + timedelta(minutes=period),
            geometry=lambda df: df.hex_id.map(functools.partial(h3.h3_to_geo_boundary, geo_json=True)).map(Polygon),
        )
        .sort_values(by="period_start", ascending=True)
    )

    # for each hex-period bin query for points and save to collocations dataframe
    collocated_entities = Counter()
    for _, row in tqdm(hex_period_df.iterrows(), total=len(hex_period_df), desc="Space-Time Bin", disable=False):
        vertices = list(row.geometry.exterior.coords)
        entities = dfi.get.entities(polygon=vertices, time_interval=(row.period_start, row.period_end))

        collocated_entities += Counter(entities)

    dfi.conn.progress_bar = progress_bar  # reset progress bar to previous state
    return collocated_entities

In [None]:
start_time = datetime(2022, 1, 1, 0, 0, 0)
end_time = datetime(2022, 2, 1, 1, 0, 0)
time_interval = (start_time, end_time)
resolution = 8
period = 15
uid = "65b753d2-b523-467f-9c39-bc0fd6e2393b"

colocated = hex_period_colocations(uid=uid, time_interval=time_interval, resolution=resolution, period=period)

# Co-located at a BSC

In [None]:
def hex_period_colocations_at_bsc(
    uid: str,
    resolution: int,
    period: int,
    osm_ids: List[str],
    time_interval: Optional[Tuple[datetime, datetime]] = None,
) -> pd.DataFrame:
    uid_df = dfi.get.records(entities=[uid], time_interval=time_interval, add_payload_as_json=True)

    # no data cannot be collocated with no data
    if len(uid_df) == 0:
        return []

    # filter for only dwelling pings to identify co-dwelling instances at bsc shops
    uid_df = unpack_payload(uid_df)
    uid_df = uid_df[(uid_df.transportation_mode == "dwelling") & (uid_df.start_location_id.isin(osm_ids))]

    # turn off internal progress bar to not pollute STDOUT
    progress_bar = dfi.conn.progress_bar
    dfi.conn.progress_bar = False

    hex_period_df = (
        uid_df.assign(
            hex_id=lambda df: [h3.geo_to_h3(lat, lon, resolution) for lat, lon in zip(df.latitude, df.longitude)],
            period_start=lambda df: df.timestamp.dt.round(f"{period}min"),
        )
        .drop_duplicates(subset=["hex_id", "period_start"])
        .assign(
            period_end=lambda df: df.period_start + timedelta(minutes=period),
            geometry=lambda df: df.hex_id.map(functools.partial(h3.h3_to_geo_boundary, geo_json=True)).map(Polygon),
        )
        .sort_values(by="period_start", ascending=True)
    )

    # for each hex-period bin query for points and save to collocations dataframe
    collocated_entities = Counter()
    for _, row in tqdm(hex_period_df.iterrows(), total=len(hex_period_df), desc="Space-Time Bin", disable=False):
        vertices = list(row.geometry.exterior.coords)
        entities = dfi.get.entities(polygon=vertices, time_interval=(row.period_start, row.period_end))

        collocated_entities += Counter(entities)

    dfi.conn.progress_bar = progress_bar  # reset progress bar to previous state
    return collocated_entities

In [None]:
# grab osm_ids for bsc shops
bsc_ids = pd.read_parquet(
    "https://d3ftlhu7xfb8rb.cloudfront.net/blank_street_coffee_callsigns.geoparquet", columns=["osm_id"]
).osm_id.values.tolist()

In [None]:
colocated_at_bsc = hex_period_colocations_at_bsc(
    uid=uid, osm_ids=bsc_ids, resolution=resolution, period=period, time_interval=time_interval
)

## Devices that are Co-located at BSC Shops and Elsewhere


In [None]:
colocated & colocated_at_bsc