# Merging the Crime and Demographic Data Together

In [2]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import ast

# Load the LSOA dataset
lsoa_path = 'final_lsoa_census_data.csv'
lsoa_data = pd.read_csv(lsoa_path)

# Load the crime data
crime_data_path = 'barnet_crimes.csv'
crime_data = pd.read_csv(crime_data_path)

# Extract latitude and longitude from the location field
def extract_lat_lon(location_str):
    try:
        location_dict = ast.literal_eval(location_str.replace('null', 'None'))
        latitude = float(location_dict.get('latitude', None))
        longitude = float(location_dict.get('longitude', None))
        return pd.Series([latitude, longitude])
    except (ValueError, SyntaxError):
        return pd.Series([None, None])

crime_data[['latitude', 'longitude']] = crime_data['location'].apply(extract_lat_lon)

# Ensure geometry is correctly parsed
def parse_geometry(geometry_str):
    try:
        return wkt.loads(geometry_str)
    except:
        return None

lsoa_data['geometry'] = lsoa_data['geometry'].apply(parse_geometry)

# Drop invalid geometries
valid_lsoa_data = lsoa_data.dropna(subset=['geometry'])

# Create GeoDataFrame for LSOA
gdf_lsoa = gpd.GeoDataFrame(valid_lsoa_data, geometry='geometry')

# Create GeoDataFrame for crime data
gdf_crime = gpd.GeoDataFrame(crime_data, geometry=gpd.points_from_xy(crime_data.longitude, crime_data.latitude))

# Perform spatial join for crime data
joined_gdf = gpd.sjoin(gdf_crime, gdf_lsoa, how="inner", predicate='within')

# Count the number of crimes in each LSOA
crime_counts = joined_gdf.groupby('index_right').size()

# Add crime counts to LSOA GeoDataFrame
gdf_lsoa['total_crime'] = gdf_lsoa.index.map(crime_counts).fillna(0)

# Load the HMO data
hmo_data_path = 'HMO_Locations.csv'
hmo_data = pd.read_csv(hmo_data_path)

# Create GeoDataFrame from HMO data
gdf_hmo = gpd.GeoDataFrame(hmo_data, geometry=gpd.points_from_xy(hmo_data.Longitude, hmo_data.Latitude))

# Perform the spatial join between HMO data and LSOA data
joined_hmo_gdf = gpd.sjoin(gdf_hmo, gdf_lsoa, how="inner", predicate='within')

# Count the number of HMOs in each LSOA
hmo_counts = joined_hmo_gdf.groupby('index_right').size()

# Add the HMO counts to the LSOA GeoDataFrame
gdf_lsoa['hmo_count'] = gdf_lsoa.index.map(hmo_counts).fillna(0)

# Save the updated GeoDataFrame to a new file
output_file_path = 'lsoa_with_crime_and_hmo_counts.geojson'
gdf_lsoa.to_file(output_file_path, driver='GeoJSON')


