# Crime Data

## Import libraries

In [1]:
# import libraries
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
import requests
import warnings
from datetime import datetime, timedelta
warnings.filterwarnings('ignore')

## Prepare Data

### Import

In [3]:
# Define base URL for the Police API
endpoint = "https://data.police.uk/api/crimes-street/all-crime"

# Define a large polygon that covers Merseyside County and its cities
# This is an approximate bounding box that will be divided into smaller sub-polygons
large_polygon = [
    [53.6967, -3.2603],  # Northwest corner (near Southport)
    [53.3700, -3.2603],  # Southwest corner (near Wirral)
    [53.3700, -2.5500],  # Southeast corner (near Warrington)
    [53.6967, -2.5500],  # Northeast corner (near Wigan)
    [53.6967, -3.2603],  # Closing the polygon back at the Northwest corner
]

# Function to divide the large polygon into sub-polygons
def create_sub_polygons(large_polygon, divisions=2):
    # Split the large polygon into smaller sub-polygons
    # Assuming a rectangular shape for simplicity
    lat_min, lat_max = min(pt[0] for pt in large_polygon), max(
        pt[0] for pt in large_polygon)
    lng_min, lng_max = min(pt[1] for pt in large_polygon), max(
        pt[1] for pt in large_polygon)

    lat_step = (lat_max - lat_min) / divisions
    lng_step = (lng_max - lng_min) / divisions

    sub_polygons = []
    for i in range(divisions):
        for j in range(divisions):
            sub_polygon = [
                [lat_min + i * lat_step, lng_min + j * lng_step],
                [lat_min + i * lat_step, lng_min + (j + 1) * lng_step],
                [lat_min + (i + 1) * lat_step, lng_min + (j + 1) * lng_step],
                [lat_min + (i + 1) * lat_step, lng_min + j * lng_step],
                [lat_min + i * lat_step, lng_min + j * lng_step],
            ]
            sub_polygons.append(sub_polygon)

    return sub_polygons


# Divide the large polygon into smaller sub-polygons
sub_polygons = create_sub_polygons(
    large_polygon, divisions=3)  # Adjust divisions as needed

# Date range from June 2021 to the current date
start_date = datetime(2021, 7, 1)
end_date = datetime.now()

# Initialize an empty list to store the crime data
all_crimes = []

# Loop through each sub-polygon and month in the date range to collect data
for sub_polygon in sub_polygons:
    polygon_str = ":".join([f"{lat},{lng}" for lat, lng in sub_polygon])

    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y-%m")

        # Make the API call using the sub-polygon
        api_url = f"{endpoint}?date={date_str}&poly={polygon_str}"

        # Send the request to the API
        response = requests.get(api_url)

        # Check if the request was successful
        if response.status_code == 200:
            crimes = response.json()
            all_crimes.extend(crimes)
            print(
                f"Retrieved {len(crimes)} crime records for {date_str} in sub-polygon")
        elif response.status_code == 503:
            print(
                f"Request exceeded limit for {date_str} in sub-polygon. Consider further subdivision or reducing the date range.")
        else:
            print(
                f"Failed to retrieve data for {date_str}: {response.status_code}")

        # Move to the next month
        current_date += timedelta(days=31)
        # Ensure we start at the beginning of the next month
        current_date = current_date.replace(day=1)

# Convert the collected data to a DataFrame
crimes_df = pd.DataFrame(all_crimes)

Failed to retrieve data for 2013-01: 404
Failed to retrieve data for 2013-02: 404
Failed to retrieve data for 2013-03: 404
Failed to retrieve data for 2013-04: 404
Failed to retrieve data for 2013-05: 404
Failed to retrieve data for 2013-06: 404
Failed to retrieve data for 2013-07: 404
Failed to retrieve data for 2013-08: 404
Failed to retrieve data for 2013-09: 404
Failed to retrieve data for 2013-10: 404
Failed to retrieve data for 2013-11: 404
Failed to retrieve data for 2013-12: 404
Failed to retrieve data for 2014-01: 404
Failed to retrieve data for 2014-02: 404
Failed to retrieve data for 2014-03: 404
Failed to retrieve data for 2014-04: 404
Failed to retrieve data for 2014-05: 404
Failed to retrieve data for 2014-06: 404
Failed to retrieve data for 2014-07: 404
Failed to retrieve data for 2014-08: 404
Failed to retrieve data for 2014-09: 404
Failed to retrieve data for 2014-10: 404
Failed to retrieve data for 2014-11: 404
Failed to retrieve data for 2014-12: 404
Failed to retrie

KeyboardInterrupt: 

In [None]:
# Assuming crimes_df is already populated and contains the necessary data

# Extract latitude and longitude from the 'location' dictionary
crimes_df['latitude'] = crimes_df['location'].apply(lambda x: x['latitude'])
crimes_df['longitude'] = crimes_df['location'].apply(lambda x: x['longitude'])

# Extract the 'id' and 'name' from the 'street' dictionary within the 'location' dictionary
crimes_df['street_id'] = crimes_df['location'].apply(
    lambda x: x['street']['id'])
crimes_df['street_name'] = crimes_df['location'].apply(
    lambda x: x['street']['name'])

# Extract 'category' and 'date' from the 'outcome_status' dictionary
crimes_df['outcome_category'] = crimes_df['outcome_status'].apply(
    lambda x: x['category'] if pd.notnull(x) else None)
crimes_df['outcome_date'] = crimes_df['outcome_status'].apply(
    lambda x: x['date'] if pd.notnull(x) else None)

# Optionally, drop the original 'location' and 'outcome_status' columns if they're no longer needed
crimes_df.drop(columns=['location', 'outcome_status'], inplace=True)

In [119]:
crimes_df.head()

Unnamed: 0,category,location_type,context,persistent_id,id,location_subtype,month,latitude,longitude,street_id,street_name,outcome_category,outcome_date
0,anti-social-behaviour,Force,,,93582823,,2021-06,53.386978,-3.082421,908033,On or near Coniston Avenue,,
1,anti-social-behaviour,Force,,,93576584,,2021-06,53.392841,-3.031395,910714,On or near Hilton Close,,
2,anti-social-behaviour,Force,,,93576686,,2021-06,53.372197,-3.185789,907754,On or near Supermarket,,
3,anti-social-behaviour,Force,,,93587118,,2021-06,53.420545,-3.068271,915238,On or near Gainsborough Road,,
4,anti-social-behaviour,Force,,,93579417,,2021-06,53.421259,-3.033031,915107,On or near Trafalgar Road,,


In [124]:
# # Extract only relevant columns for crime location and category
crime_relevant_df = crimes_df[['month', 'category', 'latitude', 'longitude']]

In [127]:
crime_relevant_df.head()

Unnamed: 0,month,category,latitude,longitude
0,2021-06,anti-social-behaviour,53.386978,-3.082421
1,2021-06,anti-social-behaviour,53.392841,-3.031395
2,2021-06,anti-social-behaviour,53.372197,-3.185789
3,2021-06,anti-social-behaviour,53.420545,-3.068271
4,2021-06,anti-social-behaviour,53.421259,-3.033031


In [128]:
# Save the DataFrame to a CSV file
crime_relevant_df.to_csv('clean_data/clean_crime_data.csv', index=False)

- calculate crime density
- best radius for crime density