In [None]:
import pandas as pd
import numpy as np
import os
import glacierml as gl
import matplotlib.pyplot as plt
from tqdm import tqdm
pd.set_option('display.max_columns',None)

In [None]:
home_path = '/path/to/project/directory'
[
        data_path, RGI_path, glathida_path, ref_path,
        coregistration_testing_path, 
        arch_test_path, LOO_path
] = gl.set_paths(home_path)

In [None]:
cols,data = gl.load_LOO_data(home_path,include_refs = True)

In [None]:
est = pd.Series(  np.mean(df[cols],axis = 1), name = 'est'  )
data = pd.concat([data,est],axis = 1)

In [None]:
def stats_builder(data,st1 = 1e-5, st2 = 1e4):
        
    col_list = [ 'Area', 
#                 'Slope','Lmax','Zmin',
#                 'Zmax',
                'est',
#                 'Thick Diff',
                'Vol Diff',
                'Perc Diff'
#                'FMT'
               ]
    name = ['mean',
            'median', 'min', 'max',
#             'sum',
            'IQR',
            'STD','count'
           ]

    df = pd.DataFrame( columns = col_list, index = name)
    
    x = data['FMT'].to_numpy()
    y = data['est'].to_numpy()
    area = data['Area'].to_numpy()
    
    vol_f = (x / 1e3) * area
    vol = (y / 1e3) * area
    vol_diff = vol - vol_f
    data['Vol Diff'] = vol_diff
    data['Perc Diff'] = ((y - x) / y) * 100
    data = data.iloc[np.where(
        (vol > st1) & (vol < st2)
    )]
    vol_sub = vol[np.where(
        (vol > st1) & (vol < st2)
    )]
    feat_list = col_list
    for i in feat_list:
        dft = data[i]
        upp = np.nanquantile(dft, 0.75)
        low = np.nanquantile(dft, 0.25)
        if i == 'Area' or i == 'Vol Diff':
            p = 3
        if i == 'est':
            p = 0
        functions = [
            np.round(np.nanmean(dft), p),
            np.round(np.nanmedian(dft), p), 
            np.round(np.nanmin(dft), p),
            np.round(np.nanmax(dft), p),
#             np.sum(dft),
            np.round(upp - low, p),
            np.round(np.nanstd(dft),p),
            len(dft)
        ]
        for n, fn in zip(name, functions):
            df[i].loc[n] = fn
    df = df.rename(columns = {
        'Area':'Area (km$^2$)',
#         'Slope':'Slope (deg)',
#         'Lmax':'Max Length (m)',
#         'Zmin':'Min Elevation (m)',
        'est0':'Est Thick (m)',
        'Vol Diff':'Vol Diff (km$^3$)'
#         'Farinotti Mean Thickness':'Farinotti Thickness'
    })
    df = df.round(decimals = 1)
    return df, data, vol, vol_sub

In [None]:
st1 = 50
st2 = 1e4
stats, df, vol, vol_sub = stats_builder(data,st1,st2)
print(stats.to_latex(float_format="%.3f"))
stats

In [None]:
tot_vol = np.round(sum(vol) / 1e3,3)
subset_sum_vol = np.round(sum((np.mean(df[cols], axis = 1) / 1e3) * df['Area']) / 1e3,3)
print(
    f'{st1} to {st2} * 10$^3$ km$^3$, '
    f'sum subset volume: {subset_sum_vol}, '
    f'{np.round(len(df) / len(vol)*100,3)}\% of glacier population, '
    f'{np.round((subset_sum_vol / tot_vol)*100,3)}\% of global volume, '
)