In [2]:
# import sqlite3
# import pandas as pd
# import geopandas as gpd
# from shapely.geometry import Point
# from tqdm import tqdm


# def get_borough(lon, lat, boroughs_gdf, boroughs_sindex):
#     point = Point(lon, lat)
#     possible_matches_index = list(boroughs_sindex.intersection(point.bounds))
#     possible_matches = boroughs_gdf.iloc[possible_matches_index]
#     for _, borough in possible_matches.iterrows():
#         if borough['geometry'].contains(point):
#             return borough['name']  # Assuming 'name' is the column with the borough names
#     return None

# # Load the GeoDataFrame for boroughs (assuming it's available as a shapefile or similar)
# boroughs_gdf = gpd.read_file('../data/London_Boroughs.gpkg')
# boroughs_gdf = boroughs_gdf.to_crs(epsg=4326)
# boroughs_sindex = boroughs_gdf.sindex

# worst_boroughs = [
#     'Hackney', 'Lewisham', 'Haringey', 'Islington', 'Lambeth'
# ]

# # Connect to the SQLite database
# conn = sqlite3.connect('../data/crime_data.db')
# query = 'SELECT * FROM `metropolitan-normal` WHERE `Crime type` = "Violence and sexual offences"'
# df = pd.read_sql_query(query, conn)

# # Create a list to store the filtered rows
# filtered_rows = []

# # Iterate over the dataframe rows with a progress bar
# for _, row in tqdm(df.iterrows(), total=df.shape[0]):
#     lon = row['Longitude']
#     lat = row['Latitude']
#     borough = get_borough(lon, lat, boroughs_gdf, boroughs_sindex)
#     if borough in worst_boroughs:
#         row['Borough'] = borough
#         filtered_rows.append(row)

# # Create a new DataFrame from the filtered rows
# filtered_df = pd.DataFrame(filtered_rows)

# # Save the filtered DataFrame to a CSV file
# filtered_df.to_csv('crime_hotspot_start.csv', index=False)

# # Close the database connection
# conn.close()

In [3]:
import pandas as pd
from sklearn.cluster import KMeans
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster

# Load your dataframe
df_crime = pd.read_csv('crime_hotspot_start.csv')  # Replace with the actual path to your dataframe
df_crime

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context,Borough
0,4eba897e44f97e216101eb13ee2192876e6a5adbd02816...,2014-06,Metropolitan Police Service,Metropolitan Police Service,-0.078592,51.419842,On or near Westow Hill,E01000746,Bromley 005E,Violence and sexual offences,Under investigation,,Lambeth
1,04b0e52fa42c7c3663458c301bab4fbd621d6007952f9c...,2014-06,Metropolitan Police Service,Metropolitan Police Service,-0.071003,51.570542,On or near Portland Avenue,E01001808,Hackney 001A,Violence and sexual offences,Investigation complete; no suspect identified,,Hackney
2,5a55191d39d812335b9141937593a2db74618ff81bb7a0...,2014-06,Metropolitan Police Service,Metropolitan Police Service,-0.071649,51.572666,On or near Broadway Mews,E01001808,Hackney 001A,Violence and sexual offences,Investigation complete; no suspect identified,,Hackney
3,04b0e52fa42c7c3663458c301bab4fbd621d6007952f9c...,2014-06,Metropolitan Police Service,Metropolitan Police Service,-0.071003,51.570542,On or near Portland Avenue,E01001808,Hackney 001A,Violence and sexual offences,Under investigation,,Hackney
4,f88595b06b784cd3f3dd4eb11acb903d24e2d8480fc4e8...,2014-06,Metropolitan Police Service,Metropolitan Police Service,-0.071170,51.571713,On or near Leweston Place,E01001808,Hackney 001A,Violence and sexual offences,Investigation complete; no suspect identified,,Hackney
...,...,...,...,...,...,...,...,...,...,...,...,...,...
418997,89de4ea9a95e52bd255fc2fc2bb3fb54f1647500e6d8a8...,2018-12,Metropolitan Police Service,Metropolitan Police Service,-0.021031,51.478120,On or near Creekside,E01032579,Lewisham 039E,Violence and sexual offences,Status update unavailable,,Lewisham
418998,7f6422118759a6a80e0ba9bad9d0303db58955786c5142...,2018-12,Metropolitan Police Service,Metropolitan Police Service,-0.024564,51.476003,On or near New Butt Lane,E01032579,Lewisham 039E,Violence and sexual offences,Investigation complete; no suspect identified,,Lewisham
418999,858a21a0fd41865359d696ca62dba67db323b2017f57bb...,2018-12,Metropolitan Police Service,Metropolitan Police Service,-0.024409,51.476270,On or near Reginald Road,E01032579,Lewisham 039E,Violence and sexual offences,Investigation complete; no suspect identified,,Lewisham
419000,a809d7f934f903ffe4f37b19c8c3123589beece7379e64...,2018-12,Metropolitan Police Service,Metropolitan Police Service,-0.028789,51.542323,On or near Cadogan Close,E01004223,Tower Hamlets 001B,Violence and sexual offences,Investigation complete; no suspect identified,,Hackney


In [4]:
def cluster_crime_data(df, number_of_clusters):
    clustered_data = pd.DataFrame()
    for borough in df['Borough'].unique():
        borough_data = df[df['Borough'] == borough].copy()
        kmeans = KMeans(n_clusters=number_of_clusters, random_state=42)
        borough_data['cluster'] = kmeans.fit_predict(borough_data[['Longitude', 'Latitude']])
        clustered_data = pd.concat([clustered_data, borough_data], ignore_index=True)
    return clustered_data

In [5]:
number_of_clusters = 5  # You can change this as needed
df_clustered = cluster_crime_data(df_crime, number_of_clusters)

In [6]:
gdf_boroughs = gpd.read_file("../data/London_Boroughs.gpkg")
gdf_boroughs_filtered = gdf_boroughs[gdf_boroughs['name'].isin(worst_boroughs)]  # Replace 'name' with the actual column name that contains borough names in your GeoDataFrame


def visualize_clusters(df, gdf_boroughs):
    # Create a base map
    m = folium.Map(location=[51.5074, -0.1278], zoom_start=10)  # Centered around London
    
    # Add borough boundaries to the map
    folium.GeoJson(gdf_boroughs).add_to(m)
    
    # Add clusters to the map
    for borough in df['Borough'].unique():
        borough_data = df[df['Borough'] == borough]
        marker_cluster = MarkerCluster().add_to(m)
        for idx, row in borough_data.iterrows():
            folium.Marker(
                location=[row['Latitude'], row['Longitude']],
                popup=f"Borough: {row['Borough']}<br>Cluster: {row['cluster']}"
            ).add_to(marker_cluster)
    
    return m

In [7]:
map_clusters = visualize_clusters(df_clustered, gdf_boroughs_filtered)
map_clusters.save('crime_hotspots_map.html')