In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import rasterio
import numpy as np
import dl_toolbox.datasets as datasets
import dl_toolbox.utils as utils
import os 
from pathlib import Path
import matplotlib.backends.backend_pdf
import pandas as pd

In [2]:
if os.uname().nodename == 'WDTIS890Z': 
    data_root = Path('/mnt/d/pfournie/Documents/data')
    home = Path('/home/pfournie')
    save_root = data_root / 'outputs'
elif os.uname().nodename == 'qdtis056z': 
    data_root = Path('/data')
    home = Path('/d/pfournie')
    save_root = data_root / 'outputs'
else:
    data_root = Path('/work/OT/ai4geo/DATA/DATASETS')
    #data_root = Path(os.environ['TMPDIR'])
    home = Path('/home/eh/fournip')
    save_root = Path('/work/OT/ai4usr/fournip') / 'outputs'
digitanie = data_root/'DIGITANIE_v3'

In [11]:
pd.options.display.float_format = '{:,.0f}'.format

def class_distrib_tile(msk_path, nomenc):
    
    num_class = len(nomenc)
    class_names = [l.name for l in nomenc]
    labels_merger = utils.MergeLabels([list(l.values) for l in nomenc])
    labels2rgb = utils.NomencToRgb(nomenc)
    
    with rasterio.open(msk_path) as file:
        labels_raw = file.read(out_dtype=np.float32)
    labels = labels_merger(labels_raw.squeeze())
    labels_rgb = labels2rgb(labels)
    
    counts, bins = np.histogram(labels.flatten(), range(num_class+1))
    
    return counts, bins, labels_rgb, class_names

def post_proc(table):
    
    total = sum(table)
    coeffs_ce = [np.round(max(total)/c,1) for c in total]
    coeffs_bce = [np.round((sum(total) - c)/c,1) for c in total]
    table_full = table + [total, coeffs_ce, coeffs_bce]
    
    return table_full

def class_distrib_city(city, show):
    
    pdf = matplotlib.backends.backend_pdf.PdfPages(save_root/f"{city}.pdf")
    table9 = []
    table24 = []

    # List of small tiles
    img_paths = sorted(
        [path.relative_to(digitanie) for path in Path(digitanie/city).glob('*_[0-9].tif')], 
        key=lambda x: int(x.stem.split('_')[-1])
    )

    msk_9_paths = sorted(
        [path.relative_to(digitanie) for path in Path(digitanie/city/'COS9').glob('*_mask.tif')],
        key=lambda x: int(x.stem.split('_')[1])
    )
    
    msk_24_paths = sorted(
        [path.relative_to(digitanie) for path in Path(digitanie/city/'COS24').glob('*_mask.tif')],
        key=lambda x: int(x.stem.split('_')[1])
    )
    
    for i, (img, msk9, msk24) in enumerate(zip(img_paths, msk_9_paths, msk_24_paths)):
        
        counts24, bins24, labels_rgb24, names24 = class_distrib_tile(
            digitanie/msk24, 
            datasets.DigitanieNomenclatures['24'].value
        )
        table24.append(counts24)
        
        counts9, bins9, labels_rgb9, names9 = class_distrib_tile(
            digitanie/msk9, 
            datasets.DigitanieNomenclatures['main'].value
        )
        table9.append(counts9)
        
        with rasterio.open(digitanie/img, 'r') as file:
            image = file.read(out_dtype=np.float32, indexes=[1,2,3])
        pmin, pmax = np.percentile(image, (0, 99.5), axis=(1,2))
        image = image.transpose(1,2,0)
        image_normalized = np.clip((image - pmin) / (pmax - pmin), 0, 1)
        
        fig = plt.figure(figsize=(18, 15), constrained_layout=True)
        fig.suptitle(f'{city} tuile {i}')
        spec = fig.add_gridspec(nrows=4, ncols=3)
        
        ax00 = fig.add_subplot(spec[:-2, 0])
        ax00.imshow(image_normalized)

        ax01 = fig.add_subplot(spec[:-2, 1])
        ax01.imshow(labels_rgb9)

        ax02 = fig.add_subplot(spec[:-2, 2])
        ax02.imshow(labels_rgb24)
                
        ax1 = fig.add_subplot(spec[-2, :])
        ax1.bar(
            x=bins9[:-1],
            height=counts9
        )
        tick_marks = np.arange(len(names9))
        ax1.set_xticks(tick_marks)
        ax1.set_xticklabels(names9, rotation=45, fontsize=15)
        ax1.set_ylabel("Counts", fontsize=15)
        
        ax2 = fig.add_subplot(spec[-1, :])
        ax2.bar(
            x=bins24[:-1],
            height=counts24
        )
        tick_marks = np.arange(len(names24))
        ax2.set_xticks(tick_marks)
        ax2.set_xticklabels(names24, rotation=45, fontsize=15)
        ax2.set_ylabel("Counts", fontsize=15)
        
        if show:
            plt.show()
            
        pdf.savefig( fig )

    pdf.close()
    
    table9full = post_proc(table9)
    df9 = pd.DataFrame(table9full, columns=names9, dtype=float)
    table24full = post_proc(table24)
    df24 = pd.DataFrame(table24full, columns=names24, dtype=float)
    
    with pd.ExcelWriter(save_root/f'{city}.xlsx') as writer:
        df9.to_excel(writer, sheet_name='6 classes')
        df24.to_excel(writer, sheet_name='24 classes')

In [None]:
for city in [
    'ARCACHON',
    'TOULOUSE',
    'PARIS',
    'NANTES',
    'STRASBOURG',
    'MONTPELLIER',
    'BIARRITZ'
]:
    class_distrib_city(city, show=False)

  coeffs_ce = [np.round(max(total)/c,1) for c in total]
  coeffs_bce = [np.round((sum(total) - c)/c,1) for c in total]
  coeffs_ce = [np.round(max(total)/c,1) for c in total]
  coeffs_bce = [np.round((sum(total) - c)/c,1) for c in total]
  fig = plt.figure(figsize=(18, 15), constrained_layout=True)
  coeffs_ce = [np.round(max(total)/c,1) for c in total]
  coeffs_bce = [np.round((sum(total) - c)/c,1) for c in total]


In [10]:
df=pd.read_excel(save_root/f'ARCACHON.xlsx', sheet_name='6 classes', index_col=0)
df

Unnamed: 0,nodata,other,low vegetation,high vegetation,water,building,road
0,41184,1526805,287226,1928711,378730,407,31241
1,94809,883279,66354,3010269,0,46619,92974
2,327748,57409,71398,1664953,1586494,307092,179210
3,298,576533,1026897,2590576,0,0,0
4,531104,140407,244943,2086232,314643,639207,237768
5,278786,548875,421197,2182824,0,482959,279663
6,3132,430957,332500,3427483,0,232,0
7,693293,348405,208775,1250258,49594,1185734,458245
8,531697,177939,740209,1475628,0,816103,452728
9,600891,269115,618361,1802089,162994,455725,285129
