In [10]:
import pandas as pd
from shapely import wkt
import geopandas as gpd
import os
from io import StringIO

In [14]:
# Load the CSV file
df = pd.read_csv('MPS LSOA Level Crime.csv')
# Display the first few rows of the DataFrame
print(df.head())

month_columns = [col for col in df.columns if col.isdigit() and col.startswith("20")]
df['otal'] = df[month_columns].sum(axis=1)
df = df.drop(columns=['Major Category', 'Minor Category','Refreshed Date'])

df.head()

df.to_csv("PhoneTheftData.csv", index=False)


   LSOA Code                  LSOA Name    Borough Major Category  \
0  E01000006  Barking and Dagenham 016A  E09000002          THEFT   
1  E01000007  Barking and Dagenham 015A  E09000002          THEFT   
2  E01000008  Barking and Dagenham 015B  E09000002          THEFT   
3  E01000009  Barking and Dagenham 016B  E09000002          THEFT   
4  E01000011  Barking and Dagenham 016C  E09000002          THEFT   

  Minor Category  202302  202303  202304  202305  202306  ...  202405  202406  \
0    PHONE THEFT       0       1       0       0       0  ...       0       0   
1    PHONE THEFT      10       3       2       5       8  ...      11       9   
2    PHONE THEFT       2       0       0       1       0  ...       0       0   
3    PHONE THEFT       0       1       1       3       3  ...       1       4   
4    PHONE THEFT       0       0       0       0       1  ...       0       0   

   202407  202408  202409  202410  202411  202412  202501  Refreshed Date  
0       0       0     

In [15]:
csv_file = 'PhoneTheftData.csv'
shape_dir = 'LB_shp/'
output_csv = 'PhoneTheftDataWithGeometry.csv'

PhoneTheftDataTotal = pd.read_csv(csv_file)
PhoneTheftDataTotal['LSOA Code'] = PhoneTheftDataTotal['LSOA Code'].astype(str)

all_data = []
for shp_file in os.listdir(shape_dir):
    if shp_file.endswith('.shp'):
        try:
            shp_path = os.path.join(shape_dir, shp_file)
            gdf = gpd.read_file(shp_path)
            gdf.columns = gdf.columns.str.lower()
            gdf = gdf.to_crs(epsg=4326)

            if {'lsoa21cd', 'lsoa21nm', 'lad22cd', 'lad22nm', 'geometry'}.issubset(gdf.columns):
                gdf['lsoa21cd'] = gdf['lsoa21cd'].astype(str)
                all_data.append(gdf[['lsoa21cd', 'lsoa21nm', 'lad22cd', 'lad22nm', 'geometry']])
        except Exception as e:
            print(f"Error processing {shp_file}: {e}")

all_data_gdf = pd.concat(all_data, ignore_index=True)

all_data_gdf = all_data_gdf.rename(columns={
    'lsoa21cd': 'LSOA Code',
    'lsoa21nm': 'LSOA Name',
    'lad22cd': 'Borough',
    'lad22nm': 'Borough Name'
})

merged_df = pd.merge(PhoneTheftDataTotal, all_data_gdf, on='LSOA Code', how='left')

merged_df['geometry'] = merged_df['geometry'].apply(lambda g: g.simplify(0.00001) if g else None)

merged_df.to_csv(output_csv, index=False)

print(f"Data with full monthly data and geometry saved to: {output_csv}")
print("Geometry column preview:")
print(merged_df['geometry'].head())
print("Number of missing geometries:", merged_df['geometry'].isnull().sum())

Data with full monthly data and geometry saved to: PhoneTheftDataWithGeometry.csv
Geometry column preview:
0    POLYGON ((0.0898350857932359 51.53982417332992...
1    POLYGON ((0.0776356122208071 51.54280048347972...
2    POLYGON ((0.068891671808709 51.54171902918915,...
3    POLYGON ((0.0837975002438877 51.54203365900032...
4    POLYGON ((0.0839140165805131 51.54292735147401...
Name: geometry, dtype: object
Number of missing geometries: 0
