In [None]:
import pandas as pd
import geopandas as gp
import matplotlib

In [None]:
# load the reference csv
glud_mapping_to_lsoa_df = pd.read_csv("data/mapping_files/Lower_Layer_Super_Output_Area_(2001)_to_Ward_(2005)_Lookup_in_England_and_Wales.csv")
glud_mapping_to_lsoa_df

In [None]:
# load the lsoa file from 2001
lsoa_2001_gdf = gp.read_file("data/reference_geometries/lsoa_2001.geojson")

In [None]:
lsoa_2001_gdf.head()

In [None]:
# merge the mapping to the lsoas
lsoa_2001_w_ward_2005_names_gdf = lsoa_2001_gdf.merge(glud_mapping_to_lsoa_df[["LSOA01CD","WD05CD","WD05NM"]], on="LSOA01CD",how="inner")


In [None]:
# dissolve these to create a file which is just the ward codes
ward_2005_gdf = lsoa_2001_w_ward_2005_names_gdf[["WD05CD","WD05NM","geometry"]].dissolve(["WD05CD","WD05NM"]).reset_index()

In [None]:
# save this to disk as a stage file
ward_2005_gdf.to_file("data/staged_files/s1_ward_2005_geometry_file_for_GLUD_analysis.geojson", driver="GeoJSON")

In [None]:
ward_2005_gdf_poly = ward_2005_gdf.copy()
ward_2005_gdf_poly.loc[ward_2005_gdf_poly.WD05CD.str.startswith("00AA"),"WD05CD"] = "00AA"
ward_2005_gdf.geometry = ward_2005_gdf.geometry.centroid


# Making the actual table for the regression 

In [None]:
# load london gdf
london_boundary_gdf = gp.read_file("data/reference_geometries/london_lad.geojson").to_crs(4326) # convert to WGS84
london_boundary_gdf
# combine the london_boundary_gdf to a single geometry
london_boundary_gdf["city"] = "london"
london_boundary_gdf = london_boundary_gdf[["city","geometry"]].dissolve("city").reset_index().to_crs(27700)
london_boundary_gdf.geometry = london_boundary_gdf.geometry.buffer(1000)
london_boundary_gdf = london_boundary_gdf.to_crs(4326)

In [None]:
# load an MSOA file which has the geography code
msoa_gdf = gp.read_file("data/reference_geometries/msoa_2011_geometries.geojson")
msoa_gdf = msoa_gdf.rename(columns={"MSOA11CD":"geography code"})[["geography code","geometry"]]
msoa_gdf

In [None]:

msoa_gdf = msoa_gdf.sjoin(london_boundary_gdf, predicate="within").drop(columns="index_right").reset_index().drop(columns="index")
msoa_gdf["msoa_area"] = msoa_gdf.to_crs(27700).geometry.area
msoa_gdf

In [None]:
msoa_gdf.head(2).tail(1).explore()

In [None]:
# get the mapping of which wards are in which MSOAs using their centroids
ward_2005_gdf_mapped_to_msoas = ward_2005_gdf.sjoin(msoa_gdf,predicate="within").drop(columns="index_right")
ward_2005_gdf_mapped_to_msoas
# add 

In [None]:
# for the wards in city of London we will manually adjust them - so anything that starst with 00AA
ward_2005_gdf_mapped_to_msoas.loc[ward_2005_gdf_mapped_to_msoas.WD05CD.str.startswith("00AA"),"WD05CD"] = "00AA"

In [None]:
# load the GLUD data itself
glud_data_df = pd.read_csv("data/glud/land-use-glud-ward.csv").rename(columns={"Area Code":"WD05CD"})
glud_data_df

In [None]:
# as city of Lodnon is missing for some reason 

In [None]:
glud_data_gdf = ward_2005_gdf_mapped_to_msoas.merge(glud_data_df, on="WD05CD")
glud_data_gdf = glud_data_gdf.drop(columns=['WD05CD', 'WD05NM',"city","msoa_area",'Local Authority Name', 'Area name'])
glud_data_gdf.columns = [x.replace("%","pct").lower().replace(" ","_") for x in glud_data_gdf.columns]
glud_data_gdf = glud_data_gdf.drop(columns=["pct_total_area_of_all_land_types","quality_of_fit_indicator"])
glud_data_gdf

In [None]:
glud_data_gdf = glud_data_gdf.drop_duplicates("geography_code")
glud_data_gdf.explore()

In [None]:
poly_msoas_gdf = ward_2005_gdf_poly.merge(ward_2005_gdf_mapped_to_msoas.drop(columns="geometry"), on=["WD05CD","WD05NM"])[["geography code","geometry"]].dissolve("geography code").reset_index().rename(columns={"geography code":"geography_code"})

In [None]:
output_glud_data_gdf = poly_msoas_gdf.merge(glud_data_gdf.drop(columns="geometry"), on="geography_code")
output_glud_data_gdf

In [None]:
output_glud_data_gdf.explore("area_of_greenspace")

# Write this to disk

In [None]:
%%time
output_glud_data_gdf["geometry"] = output_glud_data_gdf["geometry"].to_wkt()
output_glud_data_gdf.to_csv("data/staged_files/s1_glud_land_cover_data_msoa_level.csv", index=False)