# UK Police API ingestion (baseline)

Goal: pull street-level crime records around a case-study point (lat/lon) for a given month.
This creates the raw empirical basis for later zoning and Stackelberg patrol optimisation.


In [1]:
%run _bootstrap.py

In [2]:
import os, sys
from pathlib import Path

repo_root = Path.cwd().resolve()
# If you launched Jupyter from repo root this is fine.
# If not, walk upwards until we find src/
while not (repo_root / "src").exists() and repo_root != repo_root.parent:
    repo_root = repo_root.parent

os.chdir(repo_root)
sys.path.insert(0, str(repo_root))

print("Repo root:", repo_root)
print("sys.path[0]:", sys.path[0])


Repo root: /Users/obypa/Desktop/security-games-uk-patrols
sys.path[0]: /Users/obypa/Desktop/security-games-uk-patrols


In [3]:
from src.uk_police_api import get_crimes_all_categories
print("IMPORT_OK")

IMPORT_OK


In [4]:
import inspect
print(get_crimes_all_categories)
print(inspect.getsource(get_crimes_all_categories)[:300])

<function get_crimes_all_categories at 0x104b359e0>
def get_crimes_all_categories(
    latitude: float,
    longitude: float,
    date: str,
    session: Optional[PoliceAPISession] = None,
) -> pd.DataFrame:
    """
    Pull street-level crimes around a point for a given month.

    Parameters
    ----------
    latitude, longitude : float
    date :


In [5]:
import pandas as pd

LAT, LNG = 51.5080, -0.1281  # Trafalgar Square-ish
DATE = "2025-11"             # YYYY-MM (pick what worked for you)

In [6]:
def previous_months(date_str: str, n: int = 5):
    start = pd.to_datetime(date_str)
    months = pd.date_range(end=start, periods=n, freq="MS")
    return [m.strftime("%Y-%m") for m in months]

months_to_query = previous_months(DATE, n=5)
months_to_query

['2025-07', '2025-08', '2025-09', '2025-10', '2025-11']

In [7]:
dfs = []

for m in months_to_query:
    print(f"Fetching data for {m}...")
    df_m = get_crimes_all_categories(latitude=LAT, longitude=LNG, date=m)
    df_m["query_month"] = m  # provenance
    dfs.append(df_m)

df_all = pd.concat(dfs, ignore_index=True)

print("Total rows:", len(df_all))
display(df_all.head())


Fetching data for 2025-07...
Fetching data for 2025-08...
Fetching data for 2025-09...
Fetching data for 2025-10...
Fetching data for 2025-11...
Total rows: 27310


Unnamed: 0,crime_category,persistent_id,month,latitude,longitude,street_id,street_name,location_type,outcome_category,outcome_date,query_month
0,anti-social-behaviour,,2025-07,51.512635,-0.143204,1676181,On or near St George Street,Force,,,2025-07
1,anti-social-behaviour,,2025-07,51.501162,-0.116252,1680084,On or near Addington Street,Force,,,2025-07
2,anti-social-behaviour,,2025-07,51.516568,-0.121381,1679291,On or near Newton Street,Force,,,2025-07
3,anti-social-behaviour,,2025-07,51.503196,-0.127032,1678183,On or near Downing Street,Force,,,2025-07
4,anti-social-behaviour,,2025-07,51.518405,-0.119302,1679695,On or near Procter Street,Force,,,2025-07


In [8]:
df_all["query_month"].value_counts().sort_index()

query_month
2025-07    5568
2025-08    5114
2025-09    4987
2025-10    5428
2025-11    6213
Name: count, dtype: int64

In [9]:
df_all["crime_category"].value_counts().head(10)

crime_category
theft-from-the-person    7880
other-theft              5842
violent-crime            3371
anti-social-behaviour    3096
shoplifting              2379
public-order             1108
robbery                  1015
drugs                     670
burglary                  540
criminal-damage-arson     532
Name: count, dtype: int64

In [10]:
df_all[["latitude", "longitude"]].describe()

Unnamed: 0,latitude,longitude
count,27310.0,27310.0
mean,51.510702,-0.130477
std,0.00601,0.009069
min,51.493937,-0.151287
25%,51.507725,-0.136864
50%,51.511997,-0.131248
75%,51.514903,-0.125259
max,51.522284,-0.105571


In [11]:
def fetch_months(lat: float, lng: float, end_month: str, n_months: int = 5) -> pd.DataFrame:
    months = previous_months(end_month, n=n_months)
    dfs = []
    for m in months:
        df_m = get_crimes_all_categories(latitude=lat, longitude=lng, date=m)
        df_m["query_month"] = m
        dfs.append(df_m)
    return pd.concat(dfs, ignore_index=True)

df_all = fetch_months(LAT, LNG, DATE, n_months=5)
print("Total rows:", len(df_all))
df_all.head()


Total rows: 27310


Unnamed: 0,crime_category,persistent_id,month,latitude,longitude,street_id,street_name,location_type,outcome_category,outcome_date,query_month
0,anti-social-behaviour,,2025-07,51.512635,-0.143204,1676181,On or near St George Street,Force,,,2025-07
1,anti-social-behaviour,,2025-07,51.501162,-0.116252,1680084,On or near Addington Street,Force,,,2025-07
2,anti-social-behaviour,,2025-07,51.516568,-0.121381,1679291,On or near Newton Street,Force,,,2025-07
3,anti-social-behaviour,,2025-07,51.503196,-0.127032,1678183,On or near Downing Street,Force,,,2025-07
4,anti-social-behaviour,,2025-07,51.518405,-0.119302,1679695,On or near Procter Street,Force,,,2025-07


In [12]:
# Minimal, analysis-ready aggregation: counts by month + category
monthly_counts = (
    df_all.groupby(["query_month", "crime_category"], observed=True)
    .size()
    .reset_index(name="n_crimes")
    .sort_values(["query_month", "n_crimes"], ascending=[True, False])
)

display(monthly_counts.head(20))
print("Rows in summary:", len(monthly_counts))


Unnamed: 0,query_month,crime_category,n_crimes
11,2025-07,theft-from-the-person,1648
6,2025-07,other-theft,1246
0,2025-07,anti-social-behaviour,680
13,2025-07,violent-crime,662
10,2025-07,shoplifting,448
9,2025-07,robbery,218
8,2025-07,public-order,207
3,2025-07,criminal-damage-arson,102
4,2025-07,drugs,90
2,2025-07,burglary,84


Rows in summary: 70


In [13]:
out_path = "data/processed/monthly_crime_counts_5m.csv"
monthly_counts.to_csv(out_path, index=False)
print("Wrote:", out_path)

Wrote: data/processed/monthly_crime_counts_5m.csv


In [14]:
sample_path = "data/sample/monthly_crime_counts_5m_sample.csv"
monthly_counts.head(200).to_csv(sample_path, index=False)

print("Saved sample to:", sample_path)


Saved sample to: data/sample/monthly_crime_counts_5m_sample.csv


In [15]:
import os
os.path.exists(sample_path)

True

In [16]:
raw_sample_path = "data/sample/crime_rows_5m_sample_500.csv"
df_all.sample(500, random_state=42).to_csv(raw_sample_path, index=False)
print("Saved:", raw_sample_path)


Saved: data/sample/crime_rows_5m_sample_500.csv
