In [None]:
import pandas as pd
import geopandas as gp
import matplotlib

In [None]:
# load london gdf
london_boundary_gdf = gp.read_file("data/reference_geometries/london_lad.geojson").to_crs(4326) # convert to WGS84
london_boundary_gdf
# combine the london_boundary_gdf to a single geometry
london_boundary_gdf["city"] = "london"
london_boundary_gdf = london_boundary_gdf[["city","geometry"]].dissolve("city").reset_index().to_crs(27700)
london_boundary_gdf.geometry = london_boundary_gdf.geometry.buffer(1000)
london_boundary_gdf = london_boundary_gdf.to_crs(4326)

In [None]:
# load an MSOA file which has the geography code
msoa_gdf = gp.read_file("data/reference_geometries/msoa_2011_geometries.geojson")
msoa_gdf = msoa_gdf.rename(columns={"MSOA11CD":"geography code"})[["geography code","geometry"]]
msoa_gdf

In [None]:
msoa_gdf = msoa_gdf.sjoin(london_boundary_gdf, predicate="within").drop(columns="index_right").reset_index().drop(columns="index")
msoa_gdf["msoa_area"] = msoa_gdf.to_crs(27700).geometry.area
msoa_gdf

In [None]:
# calculate the pct of continuous urban fabric in each msoa
gdf = gp.read_file("data/staged_files/clc_12/Continuous urban fabric/greater_london_Continuous urban fabric_clc_12.geojson").drop(columns="index_right")
gdf = gdf.overlay(msoa_gdf)
gdf["unique_idx"] = "uniq_id_00"+gdf.index.astype(str)
gdf["continuous_land_area"] = gdf.to_crs(27700).geometry.area
continuous_urban_land_df = gdf
# continuous_urban_land_df = gdf.sjoin(msoa_gdf)[["geography code","continuous_land_area","msoa_area"]].drop_duplicates().reset_index()
continuous_urban_land_df["pct_continuous_urban_land_clc_12"] = continuous_urban_land_df["continuous_land_area"] / continuous_urban_land_df["msoa_area"]
continuous_urban_land_df = continuous_urban_land_df[["geography code","pct_continuous_urban_land_clc_12"]]
continuous_urban_land_df = continuous_urban_land_df.groupby("geography code").sum()

continuous_urban_land_df

In [None]:
# calculate the pct of discontinuous continuous urban fabric in each msoa
gdf = gp.read_file("data/staged_files/clc_12/Discontinuous urban fabric/greater_london_Discontinuous urban fabric_clc_12.geojson").drop(columns="index_right")
gdf = gdf.overlay(msoa_gdf)#.drop(columns="msoa_area"_
# gdf
gdf["unique_idx"] = "uniq_id_00"+gdf.index.astype(str)
gdf["discontinuous_land_area"] = gdf.to_crs(27700).geometry.area
gdf
# continuous_urban_land_df = gdf.sjoin(msoa_gdf)[["geography code","discontinuous_land_area","msoa_area"]].drop_duplicates().reset_index()
discontinuous_urban_land_df = gdf
discontinuous_urban_land_df["pct_continuous_urban_land_clc_12"] = discontinuous_urban_land_df["discontinuous_land_area"] / discontinuous_urban_land_df["msoa_area"]
discontinuous_urban_land_df = discontinuous_urban_land_df[["geography code","pct_continuous_urban_land_clc_12"]]
discontinuous_urban_land_df = discontinuous_urban_land_df.groupby("geography code").sum()
discontinuous_urban_land_df

In [None]:
# add the continuous urban and discontinuous urban together
urban_land_df = pd.concat([continuous_urban_land_df,discontinuous_urban_land_df]).reset_index()
urban_land_df = urban_land_df.groupby("geography code").sum().reset_index()
urban_land_df

In [None]:
# calculate the pct of green urban areas in each msoa
gdf = gp.read_file("data/staged_files/clc_12/Green urban areas/greater_london_Green urban areas_clc_12.geojson").drop(columns="index_right")
gdf = gdf.overlay(msoa_gdf)#.drop(columns="msoa_area"_
# gdf
gdf["unique_idx"] = "uniq_id_00"+gdf.index.astype(str)
gdf["green_urban_land_area"] = gdf.to_crs(27700).geometry.area
green_urban_land_df = gdf
green_urban_land_df["pct_green_urban_land_area_clc_12"] = green_urban_land_df["green_urban_land_area"] / green_urban_land_df["msoa_area"]
green_urban_land_df = green_urban_land_df[["geography code","pct_green_urban_land_area_clc_12"]]
green_urban_land_df = green_urban_land_df.groupby("geography code").sum().reset_index()
green_urban_land_df

In [None]:
# calculate the pct of industrial and commercial areas  in each msoa
gdf = gp.read_file("data/staged_files/clc_12/Industrial or commercial units/greater_london_Industrial or commercial units_clc_12.geojson").drop(columns="index_right")
gdf = gdf.overlay(msoa_gdf)
gdf["unique_idx"] = "uniq_id_00"+gdf.index.astype(str)
gdf["industrial_commerical_land_area"] = gdf.to_crs(27700).geometry.area
industrial_commerical_land_area_df = gdf
industrial_commerical_land_area_df["pct_industrial_commerical_land_area_clc_12"] = industrial_commerical_land_area_df["industrial_commerical_land_area"] / industrial_commerical_land_area_df["msoa_area"]
industrial_commerical_land_area_df = industrial_commerical_land_area_df[["geography code","pct_industrial_commerical_land_area_clc_12"]]
industrial_commerical_land_area_df = industrial_commerical_land_area_df.groupby("geography code").sum().reset_index()
industrial_commerical_land_area_df

In [None]:
# calculate the pct of road and rail related areas  in each msoa
gdf = gp.read_file("data/staged_files/clc_12/Road and rail networks and associated land/greater_london_Road and rail networks and associated land_clc_12.geojson").drop(columns="index_right")
gdf = gdf.overlay(msoa_gdf)
gdf["unique_idx"] = "uniq_id_00"+gdf.index.astype(str)
gdf["road_and_rail_land_area"] = gdf.to_crs(27700).geometry.area
road_and_rail_land_area_df = gdf
road_and_rail_land_area_df["pct_road_and_rail_land_area_clc_12"] = road_and_rail_land_area_df["road_and_rail_land_area"] / road_and_rail_land_area_df["msoa_area"]
road_and_rail_land_area_df = road_and_rail_land_area_df[["geography code","pct_road_and_rail_land_area_clc_12"]]
road_and_rail_land_area_df = road_and_rail_land_area_df.groupby("geography code").sum().reset_index()
road_and_rail_land_area_df.head()

In [None]:
# calculate the pct of sports and leisure facilities  in each msoa
gdf = gp.read_file("data/staged_files/clc_12/Sport and leisure facilities/greater_london_Sport and leisure facilities_clc_12.geojson").drop(columns="index_right")
gdf = gdf.overlay(msoa_gdf)
gdf["unique_idx"] = "uniq_id_00"+gdf.index.astype(str)
gdf["sports_facilities_land_area"] = gdf.to_crs(27700).geometry.area
sports_facilities_land_area_df = gdf
sports_facilities_land_area_df["pct_sports_facilities_land_area_clc_12"] = sports_facilities_land_area_df["sports_facilities_land_area"] / sports_facilities_land_area_df["msoa_area"]
sports_facilities_land_area_df = sports_facilities_land_area_df[["geography code","pct_sports_facilities_land_area_clc_12"]]
sports_facilities_land_area_df = sports_facilities_land_area_df.groupby("geography code").sum().reset_index()
sports_facilities_land_area_df

In [None]:
# merge it all together
output_gdf = msoa_gdf[["geography code","geometry"]].merge(urban_land_df, how="outer").merge(green_urban_land_df, how="outer").merge(industrial_commerical_land_area_df, how="outer").merge(road_and_rail_land_area_df, how="outer").merge(sports_facilities_land_area_df, how="outer").fillna(0)
output_gdf

# Write to disk 

In [None]:
%%time
output_gdf.drop(columns="geometry").to_csv("data/staged_files/s1_clc_land_use_area_pct.csv", index=False)

In [None]:
output_gdf.explore("pct_sports_facilities_land_area_clc_12")