In [None]:
# plotting imports
import contextily as cx
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.patches import Patch

# dataset import
from srai.datasets import PhiladelphiaCrimeDataset

In [None]:
philadelphia_crime = PhiladelphiaCrimeDataset()

In [None]:
type(philadelphia_crime.train_gdf), type(philadelphia_crime.test_gdf)

Get data using .load() method -> a default version 'res_8' 

In [None]:
ds = philadelphia_crime.load()
ds.keys()

In [None]:
type(philadelphia_crime.train_gdf), type(philadelphia_crime.test_gdf)

In [None]:
resolution = philadelphia_crime.resolution
resolution

In [None]:
gdf_train, gdf_test = ds["train"], ds["test"]

In [None]:
gdf_train.head()

Getting the h3 with target values

In [None]:
train_h3, _, test_h3 = philadelphia_crime.get_h3_with_labels()

In [None]:
fig, axes = plt.subplots(
    2, 1, sharex=False, sharey=False, figsize=(12, 16), height_ratios=[4, 1]
)

train_h3.plot(
    color="orange",
    markersize=0.1,
    ax=axes[0],
    label="train",
    alpha=np.minimum(np.power(train_h3[philadelphia_crime.target] + 0.4, 2), 1),
)
test_h3.plot(
    color="royalblue",
    markersize=0.1,
    ax=axes[0],
    label="test",
    alpha=np.minimum(np.power(test_h3[philadelphia_crime.target] + 0.4, 2), 1),
)

cx.add_basemap(axes[0], source=cx.providers.CartoDB.PositronNoLabels, crs=4326, zoom=12)
axes[0].set_title("Philadelphia crime data aggregated to H3 cells")
axes[0].legend(
    handles=[Patch(facecolor="orange"), Patch(facecolor="royalblue")],
    labels=["Train", "Test"],
)
axes[0].set_axis_off()

sns.kdeplot(
    x=train_h3[philadelphia_crime.target],
    label="train",
    color="orange",
    ax=axes[1],
    fill=False,
    cut=0,
)
sns.kdeplot(
    x=test_h3[philadelphia_crime.target],
    label="test",
    color="royalblue",
    ax=axes[1],
    fill=False,
    cut=0,
)
axes[1].set_title("Philadelphia crime data - target distribution")
axes[1].legend()

fig.tight_layout()

plt.show()

Get data from 2013 year.

In [None]:
ds = philadelphia_crime.load(version="2013")
ds.keys()

In [None]:
type(philadelphia_crime.train_gdf), type(philadelphia_crime.test_gdf)

In [None]:
ds["train"].head()

Creating your own train - test split -> Bucket regression (works similarly for spatial regression)

In [None]:
philadelphia_crime.target

In [None]:
train, test = philadelphia_crime.train_test_split(
    test_size=0.2, resolution=8, n_bins=10, random_state=42
)

In [None]:
type(philadelphia_crime.train_gdf), type(philadelphia_crime.test_gdf)

In [None]:
philadelphia_crime.resolution

In [None]:
train.head()

In [None]:
test.head()