In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
import geopandas as gpd
import os
import numpy as np
import matplotlib.pyplot as plt 
import shapely as shp
import os, sys
from tqdm import tqdm
sys.path.append('../scripts/')
import loadpaths
import land_cover_analysis as lca
import land_cover_visualisation as lcv
path_dict = loadpaths.loadpaths()


## Merge main predictions in sections:

In [10]:
tile_outlines_tight = lca.load_pols('../content/landscape_character_grid/Landscape_Character_Grid_tight.shp')
assert len(tile_outlines_tight) == 1617 
col_sections = '100_grid'
assert col_sections in tile_outlines_tight.columns
arr_sections = tile_outlines_tight[col_sections].unique()
# tile_outlines_tight.head(5)

path_individual_tiles = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/predictions_LCU_2023-04-24-1259_notdissolved_padding44_FGH-override/'

In [17]:
for section in tqdm(arr_sections):
    tiles_in_section = list(tile_outlines_tight[tile_outlines_tight[col_sections] == section]['PLAN_NO'])

    curr_dir_name = path_individual_tiles.split('/') 
    curr_dir_name = curr_dir_name[-1] if curr_dir_name[-1] != '' else curr_dir_name[-2]  # in case dir_indiv_tile_shp ends with '/'
    filename_merger = f'merged_{curr_dir_name}_section-{section}.shp'

    lca.merge_individual_shp_files(dir_indiv_tile_shp=path_individual_tiles, 
                                    delete_individual_shp_files=False,
                                    list_tile_ids_to_merge=tiles_in_section,
                                    filename=filename_merger)

  0%|          | 0/26 [00:00<?, ?it/s]

100%|██████████| 26/26 [00:00<00:00, 409.82it/s]


## Merge detailed predictions in sections:
Difference with main is that:
- we need to merge 3 different shp files (C, D and E)
- we need to exclude no-class predictions from each
- we need to include FGH layer from main predictions 
- create new individual tiles on the fly? 

In [None]:
tile_outlines_tight = lca.load_pols('../content/landscape_character_grid/Landscape_Character_Grid_tight.shp')
assert len(tile_outlines_tight) == 1617 
col_sections = '100_grid'
assert col_sections in tile_outlines_tight.columns
arr_sections = tile_outlines_tight[col_sections].unique()
# tile_outlines_tight.head(5)3

path_individual_tiles_main = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/predictions_LCU_2023-04-24-1259_notdissolved_padding44_FGH-override/'
path_individual_tiles_c = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/'
path_individual_tiles_d = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/'
path_individual_tiles_e = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/'

def create_list_all_subdirs(parent_dir):
    return [os.path.join(parent_dir, x) for x in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, x))]

list_tiles_dict = {}
list_tiles_dict['main'] = create_list_all_subdirs(path_individual_tiles_main)
list_tiles_dict['c'] = create_list_all_subdirs(path_individual_tiles_c)
list_tiles_dict['d'] = create_list_all_subdirs(path_individual_tiles_d)
list_tiles_dict['e'] = create_list_all_subdirs(path_individual_tiles_e)

for k, v in list_tiles_dict.items():
    assert len(v) == 1617, f'len(v) = {len(v)} for k = {k}'

In [None]:
## Step 1: merge different predictions for the same tile into one file:
path_folder_complete_tiles = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/' 
list_tiles = list(tile_outlines_tight['PLAN_NO'])
col_label = 'lc_label'

for tile in list_tiles:
    pols_dict = {}
    for name_classifier, list_tiles_pred in list_tiles_dict.items():
        curr_tile = [x for x in list_tiles_pred if tile in x]
        assert len(curr_tile) == 1, f'len(curr_tile) = {len(curr_tile)}'
        curr_tile = curr_tile[0]
        pols_dict[name_classifier] = lca.load_pols(curr_tile)
        if name_classifier == 'main':  # only keep FGH layer 
            pols_dict[name_classifier] = pols_dict[pols_dict[col_label].apply(lambda x: x[0]) in ['F', 'G', 'H']]
        else:  # remove no class
            pols_dict[name_classifier] = pols_dict[pols_dict[col_label] != '0']

    pols_merged = gpd.concat([pols_dict['main'], pols_dict['c'], pols_dict['d'], pols_dict['e']])
    total_area = pols_merged.area.sum()
    assert total_area > 0, f'total_area = {total_area}'
    assert np.isclose(total_area, 1e6, atol=1e1), f'total_area = {total_area}'

    pols_merged.to_file(os.path.join(path_folder_complete_tiles, f'{tile}.shp'))

In [19]:
## Step 2: merge all tiles into one file per section:
path_folder_complete_tiles = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/'
path_folder_sections = '/media/data-hdd/gis_pd/predictions/all_tiles_pd_notdissolved/'

for section in tqdm(arr_sections):
    tiles_in_section = list(tile_outlines_tight[tile_outlines_tight[col_sections] == section]['PLAN_NO'])

    curr_dir_name = path_folder_complete_tiles.split('/') 
    curr_dir_name = curr_dir_name[-1] if curr_dir_name[-1] != '' else curr_dir_name[-2]  # in case dir_indiv_tile_shp ends with '/'
    filename_merger = f'merged_{curr_dir_name}_section-{section}.shp'

    lca.merge_individual_shp_files(dir_indiv_tile_shp=path_folder_complete_tiles, 
                                    delete_individual_shp_files=False,
                                    list_tile_ids_to_merge=tiles_in_section,
                                    filename=filename_merger)
    

## Create 100km^2 grids for different mergers:

In [23]:
outline_pd = lca.load_pols(path_dict['pd_outline'])
outline_pd = outline_pd.iloc[0]['geometry']
all_tiles = lca.load_pols(path_dict['landscape_character_grid_path'])
all_tiles = all_tiles[all_tiles['geometry'].intersects(outline_pd)]
all_tiles = all_tiles.reset_index(drop=True)
assert len(all_tiles) == 1617

In [24]:
# all_tiles['COUNTY'].unique()
## tiles per county:
n_tiles_per_county = all_tiles.groupby('COUNTY').size()
n_tiles_per_county

COUNTY
CHESHIR    108
DERBYSH    993
GT MAN      32
SOUTHYO    194
STAFFOR    229
WESTYOR     61
dtype: int64

In [25]:
all_tiles['100_grid'] = all_tiles['PLAN_NO'].apply(lambda x: x[:3] + x[4])

In [26]:
all_tiles['100_grid'].unique()

array(['SE00', 'SE01', 'SE10', 'SE20', 'SJ96', 'SJ97', 'SJ98', 'SK05',
       'SK06', 'SK07', 'SK08', 'SK09', 'SK04', 'SK14', 'SK15', 'SK16',
       'SK17', 'SK18', 'SK19', 'SK25', 'SK26', 'SK27', 'SK28', 'SK29',
       'SK36', 'SK37'], dtype=object)

In [27]:
all_tiles.to_file('/home/thijs/repos/cnn-land-cover/content/landscape_character_grid/Landscape_Character_Grid_tight.shp')