## Clip datasets to each Parish council

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from pathlib import Path
import json

In [7]:
# Get current working directory
cwd = Path.cwd()

In [5]:
# Read in parish council boundaries
gdf = gpd.read_file("G:/OS_OpenData/OS_BoundaryLine/OS_BL_Parish_Wilts.shp")
gdf = gdf.to_crs("EPSG:27700")
gdf['AreaPC'] = gdf['HECTARES']

In [None]:
# Import file information to read in
with open('file_info.json') as f:
    file_info_shp = json.load(f)
    print(file_info_shp)



In [7]:
   
   
### To do
## List
# LNR (local nature reserve)
# NNR (national nature reserve)
# RPG ()
# SAM
# WHS
# Forestry commission sites
# wiltshire council sites
# MOD SPTA 
# Living churchyard

## Files
#     # "Traditional Orchards":{
#     #     "file_name":"G:\NE_OpenData\PHI\NE_Priority_Habitat_Inventory_v3-11-22_Wilts.shp",
#     #     "source":"NE",
#     #     "group":"Priority Habitat",
#     #                                         "group_field":False,
#     #                                         "unit":"HECTARES",
#     #                                         "unit_type":"AREA",
#     #                                         "area_col_name":"AREA"},  



#      "NE_Woodpasture_Parkland_Councils":{"group":"Priority Habitat",
#                                             "group_field":False,
#                                             "unit":"HECTARES",
#                                             "unit_type":"AREA",
#                                             "area_col_name":"AREA"},
#     # "NFI_AT":{"group":"TBC",
#     #                                         "group_field":"IFT_IOA",
#     #                                         "unit":"HECTARES",
#     #                                         "unit_type":"AREA",
#     #                                         "area_col_name":"Area_ha"},
#     "Protected_Landscapes_Councils":{"group":"Protected Landscape",
#                                             "group_field":False,
#                                             "unit":"HECTARES",
#                                             "unit_type":"AREA",
#                                             "area_col_name":"AREA"},  
#      "RCN_Councils":{"group":"Access and Routeways",
#                                             "group_field":False,
#                                             "unit":"Metres",
#                                             "unit_type":"LENGTH",
#                                             "area_col_name":"LENGTH"},       
# #     "RIGS_Councils":{"group":"Geology",
# #                                             "group_field":False,
# #                                             "unit":"Metres",
# #                                             "unit_type":"LENGTH",
# #                                             "area_col_name":"LENGTH"},
  
# 
#     # "Hedgerows_Councils":{"group":"Priority Habitat",
    #                                         "group_field":False,
    #                                         "unit":"Metres",
    #                                         "unit_type":"LENGTH",
    #                                         "area_col_name":"LENGTH"},
    # "HE_WHS_Count_Councils":{"group":"TBC",
    #                                         "group_field":False,
    #                                         "unit":"COUNT",
    #                                         "unit_type":"COUNT",
    #                                         "area_col_name":"COUNT",},   
    #  # "HLC_Councils":{"group":"TBC",
    #  #                                        "group_field":False,
    #  #                                        "unit":"HECTARES",
    #  #                                        "unit_type":"AREA",
    #  #                                        "area_col_name":"AREA",},
    # "LCA_Councils":{"group":"Landscape Character",
    #                                         "group_field":"Wc_lct",
    #                                         "unit":"HECTARES",
    #                                         "unit_type":"AREA",
    #                                         "area_col_name":"AREA",},       
    # "MOD_Land_Councils":{"group":"Other Land",
    #                                         "group_field":False,
    #                                         "unit":"HECTARES",
    #                                         "unit_type":"AREA",
    #                                         "area_col_name":"AREA"},
    # "NCN_Councils":{"group":"Access and Routeways",
    #                                         "group_field":False,
    #                                         "unit":"Metres",
    #                                         "unit_type":"LENGTH",
    #                                         "area_col_name":"LENGTH"}, 

In [None]:
# Create empty dataframe to fill
df_all_shp = pd.DataFrame({'NAME': [], 'value': [],'groupColumnValue': [],'groupColumnName': [],'mapGroup': [], "unitName":[],"unit":[],"datasetName":[], "source":[]})

# Loop through each dataset listed in dictionary
for file in file_info_shp.keys():
    gdf_data = gpd.read_file(file_info_shp[file]["file_name"])
    gdf_data = gdf_data.to_crs("EPSG:27700")
    print(file)

    # Create empty dataframe to fill for each PC
    df_local_shp = pd.DataFrame({'NAME': [], 'value': [],'groupColumnValue': [],'groupColumnName': [],'mapGroup': [], "unitName":[],"unit":[],"datasetName":[], "source":[]})

    # Get stats for each PC
    for pc in gdf['NAME'].unique():
        # Filter parish council dataframe to relevant PC
        gdf_local = gdf[gdf.NAME == pc]
        # Clip data to PC
        gdf_clipped = gdf_data.clip(gdf_local)

        # If dataset is a count, just need to add all occurences
        if file_info_shp[file]["unit_type"].lower()=="count":
            gdf_dissolved = gdf_clipped[[file_info_shp[file]["count_col_name"],"geometry"]].dissolve( aggfunc="count")
            gdf_dissolved['value'] = gdf_dissolved[file_info_shp[file]["count_col_name"]]
        else:
            # If dataset is split by groups, need to summarise for each group
            if file_info_shp[file]["group_field"]:
                gdf_dissolved = gdf_clipped[[file_info_shp[file]["group_field"],"geometry"]].dissolve(by = file_info_shp[file]["group_field"], aggfunc="sum").reset_index()
            else:
                # If no groupings, can just summarise all polygons
                gdf_dissolved = gdf_clipped[["geometry"]].dissolve( aggfunc="sum").reset_index()
            
            # update to readable units by rounding
            if file_info_shp[file]["unit_type"].lower() == "length":
                gdf_dissolved['value'] = (gdf_dissolved.geometry.length).round(2)
            elif file_info_shp[file]["unit_type"].lower() == "area":
                gdf_dissolved['value'] = (gdf_dissolved.geometry.area/10000).round(2)

        # Update names of columns 
        gdf_dissolved['NAME'] = pc
        if file_info_shp[file]["group_field"]:
            gdf_dissolved['groupColumnValue'] = gdf_dissolved[file_info_shp[file]["group_field"]]
            if file_info_shp[file]["groupColumnValue"]:
                gdf_dissolved['groupColumnValue'] = gdf_dissolved['groupColumnValue'].map(file_info_shp[file]["groupColumnValue"])
            
        else:
            gdf_dissolved['groupColumnValue'] = "Total"

        
        # else:
        #     gdf_dissolved['groupColumnValue'] = "Total"
            
        gdf_dissolved['groupColumnName'] = file_info_shp[file]["groupColumnName"]
        gdf_dissolved['mapGroup'] = file_info_shp[file]["group"]
        gdf_dissolved['unitName'] = file_info_shp[file]["unit_type"]
        gdf_dissolved['unit'] = file_info_shp[file]["unit"]
        gdf_dissolved['datasetName'] = file
        gdf_dissolved['source'] = file_info_shp[file]["source"]

        gdf_dissolved = gdf_dissolved[['NAME', "value","mapGroup","groupColumnName","groupColumnValue","unitName","unit","datasetName", "source"]]

        df_local_shp = pd.concat([df_local_shp,gdf_dissolved ])
    # Output as individual Csv's so can be saved
    df_local_shp.to_csv(f"{cwd}/data/clippedData/{file}_v1.csv", index = False)    
    df_all_shp = pd.concat([df_all_shp,df_local_shp ])
    

Landscape Character Areas
Ancient Tree Inventory
