In [None]:
# plotting imports
import contextily as cx
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.patches import Patch

# dataset import
from srai.datasets import PoliceDepartmentIncidentsDataset

In [None]:
police_department_incidents = PoliceDepartmentIncidentsDataset()

In [None]:
type(police_department_incidents.train_gdf), type(police_department_incidents.test_gdf)

Default config

In [None]:
ds = police_department_incidents.load(version=8)
ds.keys()

In [None]:
type(police_department_incidents.train_gdf), type(police_department_incidents.test_gdf)

In [None]:
print("Aggregation H3 resolution:", police_department_incidents.resolution)

In [None]:
print("Prediction target:", police_department_incidents.target)

In [None]:
gdf_train, gdf_test = ds["train"], ds["test"]

In [None]:
gdf_train.head()

Getting the h3 with target values

In [None]:
train_h3, _, test_h3 = police_department_incidents.get_h3_with_labels()

In [None]:
train_h3.head()

In [None]:
test_h3.head()

In [None]:
fig, axes = plt.subplots(
    2, 1, sharex=False, sharey=False, figsize=(12, 16), height_ratios=[5, 1]
)

train_h3.plot(
    color="orange",
    markersize=0.1,
    ax=axes[0],
    label="train",
    alpha=np.minimum(
        np.power(train_h3[police_department_incidents.target] + 0.4, 2), 1
    ),
)
test_h3.plot(
    color="royalblue",
    markersize=0.1,
    ax=axes[0],
    label="test",
    alpha=np.minimum(np.power(test_h3[police_department_incidents.target] + 0.4, 2), 1),
)

cx.add_basemap(axes[0], source=cx.providers.CartoDB.PositronNoLabels, crs=4326, zoom=13)
axes[0].set_title("SFPD incidents aggregated to H3 cells")
axes[0].legend(
    handles=[Patch(facecolor="orange"), Patch(facecolor="royalblue")],
    labels=["Train", "Test"],
)
axes[0].set_axis_off()

sns.kdeplot(
    x=train_h3[police_department_incidents.target],
    label="train",
    color="orange",
    ax=axes[1],
    fill=False,
    cut=0,
)
sns.kdeplot(
    x=test_h3[police_department_incidents.target],
    label="test",
    color="royalblue",
    ax=axes[1],
    fill=False,
    cut=0,
)
axes[1].set_title("SFPD incidents - target distribution")
axes[1].legend()

fig.tight_layout()

plt.show()

loading raw, full data

In [None]:
ds = police_department_incidents.load(version="all")
ds.keys()

In [None]:
type(police_department_incidents.train_gdf), type(police_department_incidents.test_gdf)

In [None]:
ds["train"].head()

Create your own train-test split -> Spatial splitting with bucket stratification

In [None]:
train, test = police_department_incidents.train_test_split(
    test_size=0.2, resolution=8, n_bins=10, random_state=42
)

In [None]:
type(police_department_incidents.train_gdf), type(police_department_incidents.test_gdf)

In [None]:
police_department_incidents.resolution

In [None]:
train.head()

In [None]:
test.head()