## Import dependancies and set environment determinism

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
import tensorflow as tf
#     tf.get_logger().setLevel('ERROR')
tf.compat.v1.logging.set_verbosity(
    0
)
import numpy as np
import random
import pandas as pd


SEED = 378
# SEED = 123
print(SEED)
def set_seeds(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)

def set_global_determinism(seed=SEED):
    set_seeds(seed=seed)

    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1)

# Call the above function with seed value
set_global_determinism(seed=SEED)

    


In [None]:
import glacierml as gl
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from scipy.stats import kstest
from scipy.stats import shapiro 
from scipy.stats import gaussian_kde
from tqdm import tqdm
from scipy import stats
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib as mpl
import scipy.stats as st
from sklearn import metrics
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
pd.set_option('display.max_columns', None)

In [None]:
# if os.path.isdir(res_dir) == False:

def run_model(model_path, n):
            
                normalizer = preprocessing.Normalization(axis=-1)
                normalizer.adapt(np.array(trfeat[n]))

                model[n] = gl.build_dnn_model(
                    normalizer, learning_rate = 0.01, 
                    layer_1 = 16, layer_2 = 4,loss = 'mae'
                )

                model_history[n] = model[n].fit(
                    trfeat[n],
                    trlabs[n],
                    validation_split=0.2,
                    callbacks = [callback],
                    verbose=0, 
                    epochs=500
                )
                model_filename = os.path.join(model_path,str(n))
                model[n].save(model_filename)
                

def find_results(df,model,n):
        results = {}
        residuals = {}
        final_results = pd.DataFrame()
        # residuals = pd.DataFrame()
        y = {}
        for n in tqdm(df.index):



            results[n] = model[n].evaluate(tefeat[n], telabs[n],verbose = 0)
            y[n] = model[n].predict(tefeat[n],verbose = 0).flatten()
        #     residuals[n] = y[n] - telabs[n].values[0]
        #     p_res = residuals[n] / telabs[n].values[0]
            RMSE = np.sqrt(metrics.mean_squared_error(telabs[n].values, y[n]))
            MAPerror = np.mean((y[n] - (telabs[n].values[0])) / telabs[n].values[0]) * 100 


        #     tq75 = np.percentile(residuals[n], 75)
        #     tq25 = np.percentile(residuals[n], 25)

        #     TIQR = tq75 - tq25

            z = model[n].predict(df.drop(['RGIId','Thickness'],axis = 1),verbose = 0).flatten()
            ver_res = z - df['Thickness']
            ver_res_mean = np.mean(ver_res)
            ver_res_std =  np.std(ver_res)
            vq75 = np.percentile(ver_res,75)
            vq25 = np.percentile(ver_res,25)
            VIQR = vq75 - vq25

            test_res_n = pd.DataFrame(pd.Series(n,name = 'i'))
            test_res = pd.DataFrame(pd.Series((RMSE), name = 'Test Res'))
        #     test_res_std = pd.DataFrame(pd.Series(np.std(residuals[n].values),name = 'Val STD'))

            test_res_n['Model Loss'] = results[n]
            test_res_n['RMSE'] = RMSE
            test_res_n['Percent Error'] = MAPerror
            trm = test_res_n.join(test_res)
        #     test_res_mean = test_res_mean.join(test_res_std)
        #     test_res_mean['TRes Max'] = residuals[n].max()
        #     test_res_mean['TRes Min'] = residuals[n].min()
        #     test_res_mean['TRes IQR'] = TIQR
            trm['VRes Mean'] = ver_res_mean    
            trm['VRes STD'] = ver_res_std
            trm['VRes Max'] = ver_res.max()
            trm['VRes Min'] = ver_res.min()
            trm['VRes IQR'] = VIQR
            trm['est'] = y[n]
            trm['Thickness'] = telabs[n].values[0]
            trm['Area'] = tefeat[n]['Area'].values[0]
            trm['Lmax'] = tefeat[n]['Lmax'].values[0]
            trm['Slope'] = tefeat[n]['Slope'].values[0]
            trm['Zmin'] = tefeat[n]['Zmin'].values[0]
            trm['Zmed'] = tefeat[n]['Zmed'].values[0]
            trm['Zmax'] = tefeat[n]['Zmax'].values[0]
            trm['CenLat'] = tefeat[n]['CenLat'].values[0]
            trm['CenLon'] = tefeat[n]['CenLon'].values[0]
            trm['Aspect'] = tefeat[n]['Aspect'].values[0]
            final_results = pd.concat([final_results,trm])
        fr = final_results
        return fr

In [None]:
callback = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    min_delta = 0.001,
    patience = 10,
    verbose = 0,
    mode = 'auto',
    baseline = None,
    restore_best_weights = True
)

In [None]:
feat_sort = ['none','shuffle','Thickness','Area']
asc_list = ['none',True,False]
for feat in feat_sort:
    for asc in asc_list:
        df = gl.coregister_data('4')
        if feat == 'none' and (asc == True or asc == False):
            continue
        elif feat == 'none' and asc == 'none':
            df = df.reset_index().drop('index',axis = 1)
            a = 'none'
        elif feat == 'shuffle' and asc == 'none':
            df = df.sample(frac = 1,random_state = 0)
            df = df.reset_index().drop('index', axis = 1)
            a = 'none'
        elif (feat == 'Thickness' or feat == 'Area') and asc == True:
            a = 'ascending'
            df = df.sort_values(feat,ascending = asc).reset_index().drop('index',axis = 1)
        elif (feat == 'Thickness' or feat == 'Area') and asc == False:
            a = 'descending'
            df = df.sort_values(feat,ascending = asc).reset_index().drop('index',axis = 1)
        elif (feat == 'Thickness' or feat == 'Area') and asc == 'none':
            continue
        elif (feat != 'none' or feat != 'shuffle') and asc != 'none':
            continue


        dft = df.copy()

        trfeat = {}
        trlabs = {}
        tefeat = {}
        telabs = {}

        for n in df.index:
            mask = df.index.isin([n])
            trfeat[n] = df.loc[~mask].drop(['RGIId','Thickness'], axis = 1)
            tefeat[n] = df.loc[mask].drop(['RGIId','Thickness'], axis = 1)
            trlabs[n] = df['Thickness'].loc[~mask]
            telabs[n] = df['Thickness'].loc[mask]
            
        model = {}
        model_history = {}
        normalizer = {}
        model_path = os.path.join('/data/fast1/glacierml/models/LOO/',feat,a)
        print(model_path)
        res_dir = os.path.join(model_path, 'final_results.pkl')
        if os.path.isdir(res_dir) == True:
            continue
            
        for n in tqdm(df.index):
            isdir = os.path.join(
                model_path, str(n)
            )
            if os.path.isdir(isdir) == True:
                model[n] = gl.load_dnn_model(os.path.join(model_path,str(n)))
            elif os.path.isdir(isdir) == False:
                set_global_determinism(seed=SEED)
                run_model(model_path, n)
                

        if os.path.isdir(res_dir) == False:
            fr = find_results(df,model,n)              
            fr['unc'] = fr['RMSE'] + fr['Percent Error']
            fr = fr.set_index('i')
            fr.to_pickle(res_dir)
            
            
        rgi_est_pth = os.path.join(model_path, 'rgi_est_raw.pkl')

        if os.path.isdir(rgi_est_pth) == True:
            continue
        elif os.path.isdir(rgi_est_pth) == False:

            RGI = gl.load_RGI()
            rfp = RGI[list(df)[:-1]]

            for n in tqdm(model.keys()):
                preds = pd.Series(
                    model[n].predict(rfp.drop('RGIId',axis = 1)).flatten(), name = n
                )
                RGI = pd.concat([RGI,preds], axis = 1)
            RGI.to_pickle(rgi_est_pth)
            # RGI = pd.read_pickle('rgi_est_raw.pkl')


In [None]:
crash()

In [None]:
# fr = pd.read_pickle(model_path + 'final_results_2.pkl')

In [None]:
# fr.sort_values('Percent Error')

## Looking at LOO first results

In [None]:
# ls = 12
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr['Thickness']
# y = fr['est']
# ax[0].scatter(
#     x,y, alpha = 0.25
# )
# ax[0].plot(
#     (x.min(),x.max()),
#     (x.min(),x.max()),
#     '-k'
# )
# ax[0].set_ylabel('LOO Estimated Thickness',fontsize = ls)

# y = fr['Percent Error']
# ax[1].scatter(
#     x,y, alpha = 0.25
# )
# ax[1].plot(
#     (x.min(),x.max()),
#     (0,0),
#     '-k'
# )
# ax[1].set_ylabel('LOO Percent Error',fontsize = ls)
# fig.supxlabel('GlaThiDa Thickness', y = 0.05,fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

In [None]:
# ls = 12
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr['Thickness']
# y = fr['est']
# ax[0].scatter(
#     x,y, alpha = 0.25
# )
# ax[0].plot(
#     (x.min(),x.max()),
#     (x.min(),x.max()),
#     '-k'
# )
# ax[0].set_ylabel('LOO Estimated Thickness',fontsize = ls)

# y = fr['Percent Error']
# x = fr['Area']
# ax[1].scatter(
#     x,y, alpha = 0.25
# )
# ax[1].plot(
#     (x.min(),x.max()),
#     (0,0),
#     '-k'
# )
# ax[1].set_xscale('log')
# ax[1].set_ylabel('LOO Percent Error',fontsize = ls)
# fig.supxlabel('GlaThiDa Thickness', y = 0.05,fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

In [None]:
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr['Area']
# y = fr['est']
# ax[0].scatter(
#     x,y,alpha = 0.25
# )
# ax[0].set_xscale('log')

# ax[0].set_ylabel('LOO Estimated Thickness', fontsize = ls)

# y = fr['Thickness']
# ax[1].scatter(
#     x,y,alpha = 0.25
# )
# ax[1].set_xscale('log')
# ax[1].set_ylabel('GlaThiDa Survey Thickness',fontsize = ls)
# fig.supxlabel('Glacier Area (km$^2$)', y = 0.05,fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

### Let's see if a linear model does anything different

In [None]:
# lin_model = {}
# lin_model_history = {}
# l1 = 16
# l2 = 4
# normalizer = {}
# loss = 'mae'
# model_path = '/data/fast1/glacierml/models/LOO_linear/'
# for n in tqdm(df.index):
#     isdir = os.path.join(
#         model_path ,str(n)
#     )
#     if os.path.isdir(isdir) == True:
#         lin_model[n] = gl.load_dnn_model(isdir)
#     elif os.path.isdir(isdir) == False:
        

# #         total_inputs = (len(df.columns)) * (len(df) - 1)
# #         dp = int(pr * total_inputs)
# #         tp = dp - (len(df.columns) + (len(df.columns)-1) )
# #         g = (len(df.columns) + (len(df.columns) - 1))
# #         l2[n] = 4
# #         l1[n] = int((dp - 1 - g - 2*l2[n]) / (10 + l2[n]))
#         normalizer = preprocessing.Normalization(axis=-1)
#         normalizer.adapt(np.array(trfeat[n]))

#         lin_model[n] = gl.build_linear_model(
#             normalizer, learning_rate = 0.01, 
#             layer_1 = l1, layer_2 = l2
#         )

#         lin_model_history[n] = model[n].fit(
#             trfeat[n],
#             trlabs[n],
#             validation_split=0.2,
#             callbacks = [callback],
#             verbose=0, 
#             epochs=500
#         )
#         model_filename = isdir
#         lin_model[n].save(model_filename)

In [None]:
# results = {}
# residuals = {}
# final_results = pd.DataFrame()
# # residuals = pd.DataFrame()
# y = {}
# for n in tqdm(df.index):
#     results[n] = model[n].evaluate(tefeat[n], telabs[n],verbose = 0)
#     y[n] = model[n].predict(tefeat[n],verbose = 0).flatten()
# #     residuals[n] = y[n] - telabs[n].values[0]
# #     p_res = residuals[n] / telabs[n].values[0]
#     RMSE = np.sqrt(metrics.mean_squared_error(telabs[n].values, y[n]))
#     MAPerror = np.mean((y[n] - (telabs[n].values[0])) / telabs[n].values[0]) * 100 


# #     tq75 = np.percentile(residuals[n], 75)
# #     tq25 = np.percentile(residuals[n], 25)

# #     TIQR = tq75 - tq25

#     z = model[n].predict(df.drop(['RGIId','Thickness'],axis = 1),verbose = 0).flatten()
#     ver_res = z - df['Thickness']
#     ver_res_mean = np.mean(ver_res)
#     ver_res_std =  np.std(ver_res)
#     vq75 = np.percentile(ver_res,75)
#     vq25 = np.percentile(ver_res,25)
#     VIQR = vq75 - vq25

#     test_res_n = pd.DataFrame(pd.Series(n,name = 'i'))
#     test_res = pd.DataFrame(pd.Series((RMSE), name = 'Test Res'))
# #     test_res_std = pd.DataFrame(pd.Series(np.std(residuals[n].values),name = 'Val STD'))

#     test_res_n['Model Loss'] = results[n]
#     test_res_n['RMSE'] = RMSE
#     test_res_n['Percent Error'] = MAPerror
#     trm = test_res_n.join(test_res)
# #     test_res_mean = test_res_mean.join(test_res_std)
# #     test_res_mean['TRes Max'] = residuals[n].max()
# #     test_res_mean['TRes Min'] = residuals[n].min()
# #     test_res_mean['TRes IQR'] = TIQR
#     trm['VRes Mean'] = ver_res_mean    
#     trm['VRes STD'] = ver_res_std
#     trm['VRes Max'] = ver_res.max()
#     trm['VRes Min'] = ver_res.min()
#     trm['VRes IQR'] = VIQR
#     trm['est'] = y[n]
#     trm['Thickness'] = telabs[n].values[0]
#     trm['Area'] = tefeat[n]['Area'].values[0]
#     trm['Lmax'] = tefeat[n]['Lmax'].values[0]
#     trm['Slope'] = tefeat[n]['Slope'].values[0]
#     trm['Zmin'] = tefeat[n]['Zmin'].values[0]
#     trm['Zmed'] = tefeat[n]['Zmed'].values[0]
#     trm['Zmax'] = tefeat[n]['Zmax'].values[0]
#     trm['CenLat'] = tefeat[n]['CenLat'].values[0]
#     trm['CenLon'] = tefeat[n]['CenLon'].values[0]
#     trm['Aspect'] = tefeat[n]['Aspect'].values[0]
#     final_results = pd.concat([final_results,trm])
# fr_lin = final_results
# fr_lin['unc'] = fr_lin['RMSE'] + fr_lin['Percent Error']
# fr_lin = fr_lin.set_index('i')
# fr_lin.to_pickle(model_path + 'final_results_linear.pkl')

In [None]:
# fr_lin = pd.read_pickle(model_path + 'final_results_linear.pkl')

In [None]:
# fr_lin.sort_values('Percent Error')

In [None]:
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr_lin['Thickness']
# y = fr_lin['est']
# ax[0].scatter(
#     x,y, alpha = 0.25
# )
# ax[0].plot(
#     (x.min(),x.max()),
#     (x.min(),x.max()),
#     '-k'
# )
# # ax[0].set_xscale('log')
# # ax[0].set_yscale('log')
# ax[0].set_ylabel('LOO Estimated Thickness', fontsize = ls)
# fig.supxlabel('GlaThiDa Thickness', y = 0.05, fontsize = ls)

# y = fr_lin['Percent Error']
# ax[1].scatter(
#     x,y, alpha = 0.25
# )
# ax[1].plot(
#     (x.min(),x.max()),
#     (0,0),
#     '-k'
# )
# ax[1].set_ylabel('LOO Percent Error', fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

In [None]:
# fr.sort_values('RMSE')

In [None]:
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr_lin['Area']
# y = fr_lin['est']
# ax[0].scatter(
#     x,y,alpha = 0.25
# )
# ax[0].set_xscale('log')
# ax[0].set_ylabel('LOO Estimated Thickness')

# y = fr_lin['Thickness']
# ax[1].scatter(
#     x,y,alpha = 0.25
# )
# ax[1].set_xscale('log')
# ax[1].set_ylabel('GlaThiDa Survey Thickness')
# fig.supxlabel('Glacier Area (km$^2$)', y = -.05)

In [None]:
# plt.scatter(
#     fr['est'],
#     fr['RMSE'],
#     alpha = 0.25
# )

## Are residuals and percent residuals normally distributed with features?

In [None]:
# for feat in ['Area','Lmax','Slope','Zmin']:
#     plt.scatter(
#         fr[feat],
#         fr['RMSE']
#     )
#     if feat == 'Area' or feat == 'Lmax':
#         plt.xscale('log')
#     plt.xlabel(feat)
#     plt.ylabel('LOO RMSE')
#     plt.show()

# for feat in ['Area','Lmax','Slope','Zmin']:
#     plt.scatter(
#         fr[feat],
#         fr['Percent Error']
#     )
#     if feat == 'Area' or feat == 'Lmax':
#         plt.xscale('log')
#     plt.xlabel(feat)
#     plt.ylabel('LOO % error')
#     plt.show()

In [None]:
# plt.scatter(
#     fr['Area'],
#     fr['RMSE']
# )
# plt.yscale('log')
# plt.xscale('log')

### Use each LOO model to predict RGI

In [None]:

cols = []
for i in range(341):
    cols.append(i)



rgi_list = list(df)[:-1]
rgi_list.append('RGIId')

In [None]:
unc_cols = []
for i in range(341):
    unc_cols.append(str(i) + '_')

In [None]:
df = pd.merge(df, RGI, how = 'inner', on = rgi_list)
X = np.mean(df[range(341)], axis = 1)
se = np.std(df[range(341)], axis = 1) / np.sqrt(341)

df['LCI'] = X - (1.96 * se)
df['UCI'] = X + (1.96 * se)

lb = df['LCI'] / 1e3 * df['Area'] / 1e3
ub = df['UCI'] / 1e3 * df['Area'] / 1e3

In [None]:
x = df['Thickness']
# y = np.mean(dfci, axis = 1)
# y = df['we']
y = np.mean(df[cols],axis = 1)
plt.errorbar(
    x,y,yerr = df['UCI'] - df['LCI'],
        alpha = 0.25,
#     label = 'Estimates $\hat{\mu}(x)$',
    linestyle = 'None',
    marker = '.',
    capsize = 8,
    color = '#1f77b4',
)

plt.plot(
    (x.min(),x.max()),
    (x.min(),x.max()),
    '-k'
)

plt.ylabel('Estimated Thickness')
plt.xlabel('GlaThiDa Survey')
plt.title('Leave-One-Out X-val 95% CI')

In [None]:
fig, ax = plt.subplots(1,2,figsize = (10,4))
x = df['Area']
y1 = df['UCI']
y2 = df['LCI']
ax[0].scatter(
    x,y1,alpha = 0.25
)
ax[0].scatter(
    x,y2,alpha = 0.25
)
ax[0].set_xscale('log')
ax[0].set_ylabel('LOO Thickness Upper CI')

y = df['Thickness']
ax[1].scatter(
    x,y,alpha = 0.25
)
ax[1].set_xscale('log')
ax[1].set_ylabel('GlaThiDa Survey Thickness')
fig.supxlabel('Glacier Area (km$^2$)', y = -.05)

In [None]:
x = df['Area']
y = df['UCI']
plt.scatter(x,y,alpha = 0.25)
plt.xscale('log')

In [None]:
ref_pth = '/data/fast1/glacierml/data/reference_thicknesses/'
ref = pd.DataFrame()
for file in os.listdir(ref_pth):
    if 'Farinotti' in file:
        file_reader = pd.read_csv('reference_thicknesses/' + file)
        ref = pd.concat([ref, file_reader], ignore_index = True) 
ref = ref.rename(columns = {
     'Farinotti Mean Thickness':'FMT',
})
ref = ref[[
     'FMT',
     'RGIId',
]]

df = pd.merge(df, ref, how = 'inner', on = 'RGIId')

In [None]:
df['est'] = df[['UCI','LCI']].mean(axis = 1)

In [None]:
x = df['Thickness']
# y = np.mean(dfci, axis = 1)
# y = df['we']
y = df['est']
plt.scatter(
    x,y,
        alpha = 0.25,
#     label = 'Estimates $\hat{\mu}(x)$',
    marker = '.',
)

plt.plot(
    (x.min(),x.max()),
    (x.min(),x.max()),
    '-k'
)

plt.ylabel('Estimated Thickness')
plt.xlabel('GlaThiDa Survey')
# plt.title('Leave-One-Out X-val 95% CI')

In [None]:
obs = df['Thickness']
obs_mean = obs.mean()
obs_std = obs.std()
obs_se = obs_mean / obs_std

pred = df['est']
pred_mean = pred.mean()
pred_std = pred.std()
pred_se = pred_mean / pred_std

pooled_var = (obs_std**2 + pred_std**2) / 2

t = (pred_mean - obs_mean) / 2

print(t)

In [None]:
obs = df['Thickness']
obs_mean = obs.mean()
obs_std = obs.std()
obs_se = obs_mean / obs_std

pred = df['FMT']
pred_mean = pred.mean()
pred_std = pred.std()
pred_se = pred_mean / pred_std

pooled_var = (obs_std**2 + pred_std**2) / 2

t = (pred_mean - obs_mean) / 2

print(t)

In [None]:
cov = np.cov(df[cols].T)

In [None]:
# cov = np.cov(df[cols])
eigenvalues, eigenvectors = np.linalg.eig(cov)

In [None]:
corr = np.corrcoef(df[cols].T)

In [None]:
f = plt.figure(figsize=(13, 12))
plt.matshow(
    cov, fignum=f.number,cmap = 'seismic',vmin=-1000, vmax=1000
)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
# plt.title('Covariance Matrix of Survey Thickness and Model Estimates', fontsize=18)
# plt.xlabel('Model Thickness Estimates',fontsize = 14)
# plt.ylabel('GlaThiDa Survey Thickness',fontsize = 14)

In [None]:
neg_covs_ind = mean_covs[mean_covs < 0].index
zer_covs_ind = mean_covs[mean_covs == 0].index
nan_covs_ind = mean_covs[mean_covs == np.nan].index
pos_covs_ind = mean_covs[mean_covs > 0].index

negs = dft.iloc[neg_covs_ind]
zero = dft.iloc[zer_covs_ind]
pos = dft.iloc[pos_covs_ind]
nans = dft.iloc[nan_covs_ind]

In [None]:
x_eval = np.linspace(0,700,500)

for i in tqdm(range(len(df))):
    x = df[cols].iloc[i]
    kde = st.gaussian_kde(np.array(x))
    plt.plot(x_eval, kde(x_eval), '-',alpha = 0.25)
plt.xscale('symlog')

In [None]:
cov = pd.DataFrame(cov)
corr = pd.DataFrame(corr)

In [None]:
cov_d = cov.drop(cov[cov[cols] <= 0].dropna(axis = 0).index)
corr_d = corr.drop(corr[corr[cols] <= 0].dropna(axis = 0).index)
corr_d = corr.drop(corr[corr[cols] <= 0].dropna(axis = 0).index)

cov_d = cov_d.reset_index().drop('index',axis = 1)
corr_d = corr_d.reset_index().drop('index',axis = 1)

In [None]:
dftr = dft.drop(cov[cov[cols] <= 0].dropna(axis = 0).index)
dftr = dftr.reset_index().drop('index',axis = 1)

In [None]:

p = plt.get_cmap('seismic')
n = 0
for i in tqdm(
    dftr.sort_values('Thickness',ascending = True).index,
#     dftr.sort_values('Thickness',ascending = True).reset_index().index
):
#     print(dft['Thickness'].loc[i])

    
    x =df[cols].loc[i]

    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(
        x_eval, kde(x_eval), '-',alpha = 0.75,
        c = p(n/(len(dft) - 1))
    )
    n = n + 1
#     c = p(n/(len(pos) - 1))
# plt.ylim(0,0.2)
# plt.yscale('log')
plt.xscale('symlog')
plt.ylabel('Likelihood')
plt.xlabel('Thickness')
plt.title('Leave-One-Out Thickness PDF')
divider = make_axes_locatable(plt.gca())
ax_cb = divider.new_horizontal(size="5%", pad=0.05)    
cb1 = mpl.colorbar.ColorbarBase(
    ax_cb, cmap=p, orientation='vertical',
    ticklocation = 'auto',ticks = [],
    label = 'Left-Out Thickness'
)
cb1.set_ticks(ticks = (0,1),labels = ['Min','Max'])
# cb1.set_label('Thickness',x = -0.07)
plt.gcf().add_axes(ax_cb)

#     plt.show()


In [None]:

p = plt.get_cmap('seismic')
n = 0
for i in tqdm(
    dftr.sort_values('Thickness',ascending = True).index,
#     dftr.sort_values('Thickness',ascending = True).reset_index().index
):
#     print(dft['Thickness'].loc[i])

    
    x =cov_d.loc[i]

    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(
        x_eval, kde(x_eval), '-',alpha = 0.25,
        c = p(n/(len(dft) - 1))
    )
    n = n + 1
#     c = p(n/(len(pos) - 1))
# plt.ylim(0,0.2)
plt.yscale('log')
plt.xscale('symlog')
plt.ylabel('Density')
plt.xlabel('Covariance')
plt.title('Leave-One-Out Covariance PDF')
divider = make_axes_locatable(plt.gca())
ax_cb = divider.new_horizontal(size="5%", pad=0.05)    
cb1 = mpl.colorbar.ColorbarBase(
    ax_cb, cmap=p, orientation='vertical',
    ticklocation = 'auto',ticks = [],
    label = 'Thickness index'
)
cb1.set_ticks(ticks = (0,1),labels = ['Min','Max'])
# cb1.set_label('Thickness',x = -0.07)
plt.gcf().add_axes(ax_cb)

#     plt.show()


In [None]:
cov

In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib as mpl

p = plt.get_cmap('seismic')
thickness = dft['Thickness']
n = 0
for i in tqdm(
    dftr.sort_values('Thickness',ascending = True).index,
#     dftr.sort_values('Thickness',ascending = True).reset_index().index
):
#     print(dft['Thickness'].loc[i])
    x = df[cols].loc[i]
    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(
        x_eval, kde(x_eval), '-',alpha = 0.75,
        c = p(n/(len(dft) - 1))
    )
    n = n + 1
#     c = p(n/(len(pos) - 1))
    
plt.xscale('symlog')
# plt.yscale('log')
plt.ylabel('Probability')
plt.xlabel('Covariance')
plt.title('Leave-One-Out Covariance PDF')
divider = make_axes_locatable(plt.gca())
ax_cb = divider.new_horizontal(size="5%", pad=0.05)    
cb1 = mpl.colorbar.ColorbarBase(
    ax_cb, cmap=p, orientation='vertical',
    ticklocation = 'auto',ticks = [],
    label = 'Thickness index'
)
cb1.set_ticks(ticks = (0,1),labels = ['Min','Max'])
# cb1.set_label('Thickness',x = -0.07)
plt.gcf().add_axes(ax_cb)
#     plt.show()


In [None]:
df[cols].min().min()

In [None]:
dft

In [None]:
(n/(len(dft) - 1))

In [None]:
p = plt.get_cmap('seismic')

for i in tqdm(negs.sort_values('Thickness',ascending = True).index):
    print(i)
    x = df[cols].iloc[i]
    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(x_eval, kde(x_eval), '-',alpha = 0.5,c = p(i/(len(pos) - 1)))
plt.xscale('symlog')
# plt.colorbar(p)

In [None]:
p = plt.get_cmap('seismic')

for i in tqdm(zero.sort_values('Thickness',ascending = True).index):
    print(i)
    x = df[cols].iloc[i]
    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(x_eval, kde(x_eval), '-',alpha = 0.5,c = p(i/(len(pos) - 1)))
plt.xscale('symlog')
# plt.colorbar(p)

In [None]:
negs

In [None]:
import scipy.stats as st

In [None]:
cov = pd.DataFrame(cov)

In [None]:
mean_covs = np.mean(cov, axis = 0)

In [None]:

plt.scatter(    pos['Zmin'],pos['Thickness'],alpha = 0.25
)
plt.scatter(
    negs['Zmin'],negs['Thickness']
)
# plt.xscale('log')

In [None]:
np.argsort(np.sum(cov,axis=0))

In [None]:
loo = np.array(df[cols].T)
plt.scatter(loo[:,65],loo[:,275])

In [None]:
np.mean(df[cols].iloc[55])

In [None]:
df.iloc[55]

In [None]:
cov = pd.DataFrame(cov)

In [None]:
cov.iloc[55]

In [None]:
np.where(cov[cols] <= 0)

In [None]:
len(np.unique(np.where(cov<0)[1]))

In [None]:
cov[cov[cols] <= 0].dropna(axis = 0)

In [None]:
cov[(cov[cov.columns] < 0).all(axis=1)]

In [None]:
cov.iloc[55].max()

In [None]:
(cov.iloc[np.where(cov<=0)[1]].index).unique()

In [None]:
dft = df.iloc[bad_glacs]

In [None]:
df.iloc[np.unique(bad_glacs)]

In [None]:
dft

In [None]:
cov[:,0]

In [None]:
df.iloc[np.unique(np.where(cov[,:]<0)[0])]

In [None]:
cov

In [None]:
np.where(cov==cov.max())

In [None]:
plt.scatter(
    eigenvectors, eigenvectors
)
plt.xscale('log')
plt.yscale('log')

In [None]:
np.where(eigenvalues == np.max(eigenvalues))

In [None]:
plt.plot(eigenvalues,linestyle = None)
plt.yscale('symlog')

In [None]:
# x = np.linspace(eigenvectors.min(),eigenvectors.max(),len(eigenvectors))
# for i in range(340):
plt.plot(
    eigenvectors[0],linestyle = None
)
plt.show()


In [None]:
df['Residual'] = df['est'] - df['Thickness']
df['FResidual'] = df['FMT'] - df['Thickness']

In [None]:
mean_1 = df['Residual'].mean()
std_1 = df['Residual'].std()
se_1 = df['Residual'].std() / np.sqrt(341)

mean_2 = df['FResidual'].mean()
std_2 = df['FResidual'].std()
se_2 = df['FResidual'].std() / np.sqrt(341)

In [None]:
Z_1 = (mean_1 - 0) / se_1

Z_2 = (mean_2 - 0) / se_2

print(Z_1)
print(Z_2)

In [None]:
x1 = df['Residual']
x2 = df['FResidual']
kde1 = stats.gaussian_kde(np.array(x1))
kde2 = stats.gaussian_kde(np.array(x2))
#visualize KDE
x1_eval = np.linspace(x1.min(),x1.max(), num=200)
plt.plot(x1_eval, kde1(x1_eval), '-',color = 'blue',label = 'This study Residual')

x2_eval = np.linspace(x2.min(),x2.max(), num=200)
plt.plot(x2_eval, kde2(x2_eval),color = 'orange',label = 'Farinotti Residual')

plt.plot(
    (x1.mean(),x1.mean()),
    (0,0.02),'--',color = 'blue',label = 'This Study Mean Residual'
)

plt.plot(
    (x2.mean(),x2.mean()),
    (0,0.02),'--',color = 'orange',label = 'Farinotti Mean Residual'
)


plt.legend()
print(f'This study mean residual = {x1.mean()}')
print(f'Farinotti mean residual = {x2.mean()}')

In [None]:
print(np.var(x1))
print(np.var(x2))

In [None]:
#get probability
p1 = kde1.integrate_box_1d(-np.inf, 0)
p2 = kde2.integrate_box_1d(-np.inf, 0)
print(f'probabiliity of achieving residual of 0 = {p1}')
print(f'probabiliity of achieving Fresidual of 0 = {p2}')


In [None]:
import scipy.stats as st
print(st.norm.pdf(Z_1))
print(st.norm.pdf(Z_2))

In [None]:
x1 = np.sort(np.random.standard_normal(size=500))
kde1 = stats.gaussian_kde(np.array(x1))
kde2 = stats.gaussian_kde(np.array(x2_eval))
#visualize KDE
plt.plot(x1, kde1(x1), '-',color = 'blue',label = 'This study Residual')

# x2_eval = np.linspace(x2.min(),x2.max(), num=200)
# plt.plot(x2_eval, kde2(x2_eval),color = 'orange',label = 'Farinotti Residual')

# plt.plot(
#     (x1.mean(),x1.mean()),
#     (0,0.02),'--',color = 'blue',label = 'This Study Mean Residual'
# )

# plt.plot(
#     (x2.mean(),x2.mean()),
#     (0,0.02),'--',color = 'orange',label = 'Farinotti Mean Residual'
# )


plt.legend()
print(f'This study mean residual = {x1.mean()}')
print(f'Farinotti mean residual = {x2.mean()}')

In [None]:
for i in range(216501):
    glac = RGI.iloc[i]
    print(sum(glac[cols] / fr['unc']) / sum(1/fr['unc']))
    break
#     (RGI[cols] /  fr['unc'].T) / (1/fr['unc'].T)

In [None]:
for i in range(5):
    plt.hist(data[range(341)].iloc[random.randint(0,341)])
    plt.show()

In [None]:
df

In [None]:
print(f'LOO Global Volume Estimate {np.round(sum(lb)), np.round(sum(ub))} * 10^3 km^3')

In [None]:
dfci = df[['LCI','UCI']]

In [None]:
x = df['Thickness']
y = np.mean(dfci, axis = 1)
plt.errorbar(
    x,y,yerr = df['UCI'] - df['LCI'],
        alpha = 0.25,
#     label = 'Estimates $\hat{\mu}(x)$',
    linestyle = 'None',
    marker = 'o',
    capsize = 8,
    color = '#1f77b4',
)

plt.plot(
    (x.min(),x.max()),
    (y.min(),y.max()),
    '-k'
)

plt.ylabel('Estimated Thickness')
plt.xlabel('GlaThiDa Survey')
plt.title('Leave-One-Out X-val 95% CI')

In [None]:
df

In [None]:
df['we'].max()