In [1]:
## Create plot with sunburst diagram of classes in the dataset
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys
sys.path.append('../scripts/')
import numpy as np
import matplotlib.pyplot as plt
import rasterio, rasterio.plot
import xarray as xr
import rioxarray as rxr
from tqdm import tqdm
import pandas as pd
import time, datetime, json
import geopandas as gpd
import loadpaths
import land_cover_analysis as lca
import land_cover_visualisation as lcv
import land_cover_models as lcm
import create_patches_from_polygons_data
from torch.utils.data import TensorDataset, DataLoader
import plotly.express as px
import plotly.graph_objects as go

path_dict = loadpaths.loadpaths()

  shapely_geos_version, geos_capi_version_string


In [3]:
mapping_dict_to_full = {'C': 'Wood and Forest Land',
                            'D': 'Moor and Heath Land',
                            'E': 'Agro-Pastoral Land',
                            'F': 'Water and Wetland',
                            'G': 'Rock and Coastal Land',
                            'H': 'Developed Land',
                            'I': 'Unclassified Land',
                            '0': 'NO CLASS',
                            'U': 'NO CLASS'}

In [4]:
df_schema = lca.create_df_mapping_labels_2022_to_80s()
df_schema.drop(columns=['description_80s', 'code_80s', 'index_2022', 'index_80s'], inplace=True)
df_schema['main_class_code'] = df_schema['code_2022'].apply(lambda x: x[0])
df_schema['main_description'] = df_schema['main_class_code'].apply(lambda x: mapping_dict_to_full[x])

In [17]:
# classes_ignore = ['D4a', 'D4b', 'D8', 'F1', 'F3b', 'F3c', 'G2b', 'G3a', 'G3b', 'G3c', 'G3d']
classes_ignore = ['C3', 'D4a', 'D4b', 'D7a', 'D7b', 'D8', 'E1', 'F1', 'F3b', 'F3c', 'G2b', 'G3a', 'G3b', 'G3c', 'G3d']
classes_ignore_2 = ['C4b', 'C4c', 'H1c', 'H1d']
## merge two lists
classes_ignore = classes_ignore + classes_ignore_2

In [23]:
## add row:
# df_schema = df_schema.append({'code_2022': 'D1a', 'description_2022': 'Upland Heath', 
#                               'main_class_code': 'D', 'main_description': 'Moor and Heath Land'}, ignore_index=True)
# df_schema = df_schema.append({'code_2022': 'D1b', 'description_2022': 'Peaty Upland Heath', 
#                               'main_class_code': 'D', 'main_description': 'Moor and Heath Land'}, ignore_index=True)
## drop row;
# df_schema = df_schema[df_schema['code_2022'] != 'D1']
## sort by code_2022:
df_schema = df_schema.sort_values(by=['code_2022'])
## reset index:
df_schema = df_schema.reset_index(drop=True)
## replace C4a in code_2022 column with C4:
df_schema['code_2022'] = df_schema['code_2022'].apply(lambda x: 'C4' if x == 'C4a' else x)
df_schema.head(50)

Unnamed: 0,description_2022,code_2022,main_class_code,main_description
0,NO CLASS,0,0,NO CLASS
1,Broadleaved High Forest,C1,C,Wood and Forest Land
2,Coniferous High Forest,C2,C,Wood and Forest Land
3,Mixed High Forest,C3,C,Wood and Forest Land
4,Scrub,C4,C,Wood and Forest Land
5,Scrub Pasture,C4b,C,Wood and Forest Land
6,Woodland/Scrub Edge,C4c,C,Wood and Forest Land
7,Clear Felled/New Plantings in Forest Areas,C5,C,Wood and Forest Land
8,Upland Heath,D1a,D,Moor and Heath Land
9,Upland Heath Blanket Bog,D1b,D,Moor and Heath Land


In [24]:
data_plot = {'child': [], 'parents': [], 'values': []}
value_size = 2
for ii in range(len(df_schema)):
    code = df_schema['code_2022'].iloc[ii]
    if code == '0' or code in classes_ignore:
        continue
    if len(code) == 1:
        data_plot['child'].append(code)
        data_plot['parents'].append('LC schema')
        if code == 'I':
            data_plot['values'].append(value_size)
        else:
            data_plot['values'].append(0) 
    elif len(code) == 2:
        data_plot['child'].append(code)
        # data_plot['parents'].append(mapping_dict_to_full[code[0]])
        data_plot['parents'].append(code[0])
        data_plot['values'].append(value_size)
    elif len(code) == 3:
        data_plot['child'].append(code)
        data_plot['parents'].append(code[:2])
        data_plot['values'].append(value_size)

        if code[:2] not in data_plot['child']:
            data_plot['child'].append(code[:2])
            data_plot['parents'].append(code[0])
            # data_plot['parents'].append(mapping_dict_to_full[code[0]])
            data_plot['values'].append(0)

list_low_level = list(set([x[0] for x in data_plot['child']]))
list_low_level.sort()
for low_level in list_low_level:
    if low_level not in data_plot['child']:
        data_plot['child'].append(low_level)
        # data_plot['child'].append(mapping_dict_to_full[low_level])
        data_plot['parents'].append('LC schema')
        data_plot['values'].append(0)

In [28]:
data_plot['colour'] = []
colour_dict = {
                'Main-class classifier': '#381537',
                'C classifier': '#0e8212',
                'D classifier': '#a33b1a',
                'E classifier': '#465E85',
                'Post-processing': '#333333',
                'OS NGD': '#8b7c1e',
                'Not estimated': '#cccccc'
                }

for ii, child in enumerate(data_plot['child']):
    if child in ['C3', 'D6b', 'D7', 'D7a', 'D7b', 'E1', 'E2', 'F', 'F3', 'G', 'G2', 'H', 'H1', 'H2', 'H3', 'I']:
        data_plot['colour'].append(colour_dict['Not estimated'])
    elif child in ['D2b', 'D2d', 'D6a', 'D6c', 'C4a', 'C4b', 'C4c', 'D1a', 'D1b']:
        data_plot['colour'].append(colour_dict['Post-processing'])
    elif child[0] in ['G', 'H'] or child == 'F2':
        data_plot['colour'].append(colour_dict['OS NGD'])
    elif child == 'F3d':
        data_plot['colour'].append(colour_dict['E classifier'])
    elif child == 'F3a':
        data_plot['colour'].append(colour_dict['D classifier'])
    elif child in ['C', 'D', 'E']:
        data_plot['colour'].append(colour_dict['Main-class classifier'])
    elif child[0] == 'C':
        data_plot['colour'].append(colour_dict['C classifier'])
    elif child[0] == 'D':
        data_plot['colour'].append(colour_dict['D classifier'])
    elif child[0] == 'E':
        data_plot['colour'].append(colour_dict['E classifier'])
    else:
        assert False, f'Colour not defined for {child}'

color_map_dict = dict(zip(data_plot['colour'], data_plot['colour']))

## Annotate label names?
https://stackoverflow.com/questions/70129355/value-annotations-around-plotly-sunburst-diagram

- Or merge with a CNN diagram:
FLTR:
- RGB image
- Box: main classifier (purple) into C/D/E
- 3 boxes: 3 classifiers (colours) into sub classes
- Box: post-processing
- Box: Add OS NGD 
-

In [29]:
data_plot['colour_all_grey'] = ['#cccccc' for x in data_plot['colour']]

In [31]:
save_fig = False 

fig = px.sunburst(data_plot,
                  names='child',
                  parents='parents',
                  values='values',
                  color='colour',
                  color_discrete_map=color_map_dict)

fig.update_traces(sort=False, selector=dict(type='sunburst'),
                  leaf=dict(opacity=1))

## Add legend
for name, col in colour_dict.items():
    fig.add_trace(go.Scatter(x=[None], y=[None],
                            mode='markers',
                            marker=dict(color=col, size=10),
                            name=name))
    
## remove clutter 
fig.update_layout(paper_bgcolor='rgba(255, 255, 255,255)',
                  plot_bgcolor='rgba(255, 255, 255, 255)')
fig.update_xaxes(tickfont=dict(color='rgba(0,0,0,0)'), range=[1, 4])
fig.update_yaxes(tickfont=dict(color='rgba(0,0,0,0)'))
fig.show()

if save_fig:
    ## When saved as PDF, all labels inside the sunburst are printed at 0 degree orientation (right way up).
    fig.write_image('../figures/land_cover_schema_2022_v2.pdf')

In [15]:
# df_schema

In [36]:
data_plot['full_name'] = [df_schema[df_schema['code_2022'] == x]['description_2022'].to_numpy()[0] if x in df_schema['code_2022'].to_list() else x for x in data_plot['child']]

# data_plot['child'].apply(lambda z: {x: y for x, y in zip(df_schema['code_2022'], df_schema['description_2022'])}[z])

In [45]:
data_plot['full_name_child'] = ['Broadleaved',
                        'Coniferous',
                        'Scrub',
                        'Scrub',
                        'Scrub Past.',
                        'Scrub Edge',
                        'Felled/Planted',
                        'Heath',
                        'Heath',
                        'Heath Bog',
                        'Grass Moor',
                        'Grass Moor',
                        'Peat Grass',
                        'Bracken',
                        'Heath/Grass',
                        'Mosaics',
                        'Heath/Bracken',
                        'Heath/Peat',
                        'Improv. Past.',
                        'Pasture',
                        'Rough Past.',
                        'Water',
                        'Peat Bog',
                        'Wetlands',
                        'Rush Pasture',
                        'Rocks',
                        'Rocks',
                        'Urban',
                        'Built-up',
                        'Major Transport',
                        'Minor Transport',
                        'Urban Greenspace',
                        'Quarries',
                        'Quarries',
                        'Derelict Land',
                        'Isolated Farms',
                        'Rural',
                        'Isolated Rural',
                        'Unclassified Land',
                        'Woods',
                        'Moors',
                        'Grass',
                        'Water',
                        'Rock',
                        'Urban']

In [48]:
map_code_to_name = {x: y for x, y in zip(data_plot['child'], data_plot['full_name_child'])}
map_code_to_name['LC schema'] = 'LC schema'
# map_code_to_name
data_plot['full_name_parent'] = [map_code_to_name[x] for x in data_plot['parents']]

In [44]:
data_plot['parents']

['C',
 'C',
 'C4',
 'C',
 'C4',
 'C4',
 'C',
 'D1',
 'D',
 'D1',
 'D2',
 'D',
 'D2',
 'D',
 'D6',
 'D',
 'D6',
 'D6',
 'E2',
 'E',
 'E2',
 'F',
 'F3',
 'F',
 'F3',
 'G2',
 'G',
 'H1',
 'H',
 'H1',
 'H1',
 'H1',
 'H2',
 'H',
 'H2',
 'H3',
 'H',
 'H3',
 'LC schema',
 'LC schema',
 'LC schema',
 'LC schema',
 'LC schema',
 'LC schema',
 'LC schema']