In [None]:
from collections import defaultdict
import glob
from datetime import datetime

import matplotlib.pyplot as plt # type: ignore
from matplotlib.colors import LogNorm # type: ignore
import numpy as np # type: ignore
import pandas as pd # type: ignore
import seaborn # type: ignore

In [None]:
def collect_sod_stats(idir):
    frmt = '%Y%m%d.npz'
    var_names = ['SOD accuracy', 'SOD precision', 'SOD recall', 'SOD f1-score']
    ice_var_names = ['precision', 'recall', 'fscore']

    dates = []
    sod_stats = defaultdict(list)
    conf_matrs = []
    sod_labels = {}

    ifiles = sorted(glob.glob(f'{idir}/stats_sod*npz'))
    for ifile in ifiles:
        d = dict(np.load(ifile, allow_pickle=True))
        sod_labels = list(d['labels'])
        if 'none' in d:
            continue
        dates.append(datetime.strptime(ifile.split('_')[-1], frmt))
        for var_name in var_names:
            sod_stats[var_name].append(d[var_name].item())
        for ice_var_name in ice_var_names:
            ice_values = d[ice_var_name]
            for ice_name, ice_value in zip(sod_labels, ice_values):
                var_name = f'{ice_name} | {ice_var_name}'
                sod_stats[var_name].append(ice_value)
        conf_matrs.append(d['matrix'])
    if len(dates) == 0:
        return None, None, None, None
    conf_matrs = np.dstack([m for m in conf_matrs])
    conf_mat = np.nansum(conf_matrs, axis=2).T
    return dates, sod_stats, conf_mat, sod_labels

def collect_sic_stats(idir):
    frmt = '%Y%m%d.npz'
    metric_names = ['Pearson', 'Bias', 'RMSE', 'DRMSE']
    var_names = []
    for metric_name in metric_names:
        for name1 in ['All', 'Avg']:
            var_names.append(f'SIC {name1} {metric_name}')
    dates = []
    sic_stats = defaultdict(list)
    ifiles = sorted(glob.glob(f'{idir}/stats_sic*npz'))
    for ifile in ifiles:
        d = dict(np.load(ifile, allow_pickle=True))
        if 'none' in d:
            continue
        dates.append(datetime.strptime(ifile.split('_')[-1], frmt))
        for var_name in var_names:
            sic_stats[var_name].append(d[var_name].item())
    return dates, sic_stats

def plot_confusion_matrix(idir, conf_mat, sod_labels):
    fig, axs = plt.subplots(1,1,figsize=(7,7))
    plt.colorbar(axs.imshow(conf_mat, norm=LogNorm()), ax=axs, shrink=0.7)
    axs.set_xticks(range(len(sod_labels)), sod_labels, rotation=90)
    axs.set_yticks(range(len(sod_labels)), sod_labels)
    axs.set_xlabel('Manual ice chart')
    axs.set_ylabel('Auto ice chart')
    plt.tight_layout()
    plt.savefig(f'{idir}/confusion_matrix.png', dpi=150, bbox_inches='tight', pad_inches=0.1)
    plt.show()

def monthly_plots(idir, aut_stats, dates):
    df = pd.DataFrame(aut_stats, index=dates)
    df['month'] = df.index.strftime('%b')
    show_names = df.columns.drop(['month'])
    for var_name in show_names:
        try:
            fig, ax = plt.subplots(1,1,figsize=(5,5))
            seaborn.boxplot(x='month', y=var_name, data=df)
            plt.savefig(f'{idir}/{var_name}.png', dpi=200, bbox_inches='tight', pad_inches=0.1)
            plt.show()
        except:
            print('Failed to show ', var_name)


In [None]:
idirs  = ['../dmi_nic', '../dmi_dmi', '../dmi_osisaf']
#idirs  = ['../dmi_nic']

for idir in idirs:
    sic_dates, sic_stats = collect_sic_stats(idir)
    monthly_plots(idir, sic_stats, sic_dates)
    sod_dates, sod_stats, conf_mat, sod_labels = collect_sod_stats(idir)
    if sod_dates is not None:
        monthly_plots(idir, sod_stats, sod_dates)
        plot_confusion_matrix(idir, conf_mat, sod_labels)