In [None]:
import sys
!{sys.executable} -m pip install Jinja2

In [None]:
import pandas as pd
import numpy as np
import os
import glacierml as gl
import matplotlib.pyplot as plt
from tqdm import tqdm
pd.set_option('display.max_columns',None)
import path_manager as pm
[
        home_path, data_path, RGI_path, glathida_path, 
        ref_path, coregistration_testing_path, 
        arch_test_path, LOO_path
] = pm.set_paths()

In [None]:
cols,data = gl.load_LOO_data(home_path, include_refs = True)

est = pd.Series( np.round( np.mean(data[cols],axis = 1),3), name = 'est'  )
data = pd.concat([data,est],axis = 1)


data = pd.concat(
    [
        data,
        pd.Series(np.round(data['est'] * data['Area'],6), name = 'Vol'),
        pd.Series(np.round(data['FMT'] * data['Area'],6), name = 'FVol'),
        pd.Series(
            np.round((data['est'] * data['Area']) - (data['FMT'] * data['Area']),3),
            name = 'Vol Diff' 
        ),
        pd.Series(
            np.round((
                (data['est'] * data['Area']) - (data['FMT'] * data['Area'])
            ) / (data['est'] * data['Area']) * 100,1) , name = 'Perc Diff' 
        )

    ], axis = 1
)   
# For the purpose of reporting results in meters
data['est'] = data['est'] * 1e3
data['FMT'] = data['FMT'] * 1e3

In [None]:
def stats_builder(data,st1 = 1e-5, st2 = 1e4):
        
    col_list = [ 'Area', 
                'est',
                'Vol Diff',
                'Perc Diff'
               ]
    name = ['mean',
            'median', 'min', 'max',
            'STD',
            'count'
           ]

    df = pd.DataFrame( columns = col_list, index = name)
    
    vol = data['Vol'].to_numpy()
    vol_diff = data['Vol Diff'].to_numpy()
    data = data.iloc[np.where(
        (vol > st1) & (vol < st2)
    )]
    vol_sub = vol[np.where(
        (vol > st1) & (vol < st2)
    )]
    feat_list = col_list
    for i in feat_list:
        dft = data[i]
        if i == 'Area':
            p = 3
        if i == 'Vol Diff':
            p = 1
        if i == 'est':
            p = 0
        functions = [
            np.round(np.nanmean(dft), p),
            np.round(np.nanmedian(dft), p), 
            np.round(np.nanmin(dft), p),
            np.round(np.nanmax(dft), p),
            np.round(np.nanstd(dft),p),
            len(dft)
        ]
        for n, fn in zip(name, functions):
            df[i].loc[n] = fn
    df = df.rename(columns = {
        'Area':'Area (km$^2$)',
        'est0':'Est Thick (m)',
        'Vol Diff':'Vol Diff (km$^3$)'
    })
    df = df.round(decimals = 1)
    return df, data, vol, vol_sub

In [None]:
st1 = 1e-5
st2 = 1e4
stats, df, vol, vol_sub = stats_builder(data,st1,st2)
print(stats.to_latex(float_format="%.1f"))
stats

In [None]:
tot_vol = np.round(sum(vol) / 1e3,3)
subset_sum_vol = np.round(sum((np.mean(df[cols], axis = 1)) * df['Area']) / 1e3,3)
print(
    f'{st1} to {st2} * 10$^3$ km$^3$, '
    f'sum subset volume: {np.round(subset_sum_vol,1)}, '
    f'{np.round(len(df) / len(vol)*100,1)}\% of glacier population, '
    f'{np.round((subset_sum_vol / tot_vol)*100,1)}\% of global volume, '
)