## Import dependancies and set environment determinism

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
import tensorflow as tf
#     tf.get_logger().setLevel('ERROR')
tf.compat.v1.logging.set_verbosity(
    0
)
import numpy as np
import random
import pandas as pd


SEED = 378
# SEED = 123
print(SEED)
def set_seeds(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)

def set_global_determinism(seed=SEED):
    set_seeds(seed=seed)

    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1)

# Call the above function with seed value
set_global_determinism(seed=SEED)

    




378


In [2]:
import glacierml as gl
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from scipy.stats import kstest
from scipy.stats import shapiro 
from scipy.stats import gaussian_kde
from tqdm import tqdm
from scipy import stats
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib as mpl
import scipy.stats as st
from sklearn import metrics
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
pd.set_option('display.max_columns', None)

In [11]:
# if os.path.isdir(res_dir) == False:

def run_model(model_path, n,l1,l2,loss):
            
    normalizer = preprocessing.Normalization(axis=-1)
    normalizer.adapt(np.array(trfeat[n]))

    model = gl.build_dnn_model(
        normalizer, learning_rate = 0.01, 
        layer_1 = l1, layer_2 = l2,loss = loss
    )

    model_history = model.fit(
        trfeat[n],
        trlabs[n],
        validation_split=0.2,
        callbacks = [callback],
        verbose=0, 
        epochs=500
    )
    model_filename = os.path.join(model_path,str(n))
    model.save(model_filename)
    return model


def find_results(df,model):
    results = {}
    residuals = {}
    final_results = pd.DataFrame()
    y = {}
    for n in tqdm(df.index):


        results[n] = model[n].evaluate(tefeat[n], telabs[n],verbose = 0)
        y[n] = model[n].predict(tefeat[n],verbose = 0).flatten()
    #     residuals[n] = y[n] - telabs[n].values[0]
    #     p_res = residuals[n] / telabs[n].values[0]
        RMSE = np.sqrt(metrics.mean_squared_error(telabs[n].values, y[n]))
        MAPerror = np.mean((y[n] - (telabs[n].values[0])) / telabs[n].values[0]) * 100 


    #     tq75 = np.percentile(residuals[n], 75)
    #     tq25 = np.percentile(residuals[n], 25)

    #     TIQR = tq75 - tq25

        z = model[n].predict(df.drop(['RGIId','Thickness'],axis = 1),verbose = 0).flatten()
        ver_res = z - df['Thickness']
        ver_res_mean = np.mean(ver_res)
        ver_res_std =  np.std(ver_res)
        vq75 = np.percentile(ver_res,75)
        vq25 = np.percentile(ver_res,25)
        VIQR = vq75 - vq25

        test_res_n = pd.DataFrame(pd.Series(n,name = n))
        test_res = pd.DataFrame(pd.Series((RMSE), name = 'RMSE'))
    #     test_res_std = pd.DataFrame(pd.Series(np.std(residuals[n].values),name = 'Val STD'))

        test_res_n['Model Loss'] = results
    #             test_res_n['RMSE'] = RMSE
        test_res_n['Percent Error'] = MAPerror
        trm = test_res_n.join(test_res)
    #         #     test_res_mean = test_res_mean.join(test_res_std)
    #         #     test_res_mean['TRes Max'] = residuals[n].max()
    #         #     test_res_mean['TRes Min'] = residuals[n].min()
    #         #     test_res_mean['TRes IQR'] = TIQR
        trm['VRes Mean'] = ver_res_mean    
        trm['VRes STD'] = ver_res_std
        trm['VRes Max'] = ver_res.max()
        trm['VRes Min'] = ver_res.min()
        trm['VRes IQR'] = VIQR
    #             trm['est'] = y
    #             trm['Thickness'] = telabs.values[0]
    #             trm['Area'] = tefeat[n]['Area'].values[0]
    #             trm['Lmax'] = tefeat[n]['Lmax'].values[0]
    #             trm['Slope'] = tefeat[n]['Slope'].values[0]
    #             trm['Zmin'] = tefeat[n]['Zmin'].values[0]
    #             trm['Zmed'] = tefeat[n]['Zmed'].values[0]
    #             trm['Zmax'] = tefeat[n]['Zmax'].values[0]
    #             trm['CenLat'] = tefeat[n]['CenLat'].values[0]
    #             trm['CenLon'] = tefeat[n]['CenLon'].values[0]
    #             trm['Aspect'] = tefeat[n]['Aspect'].values[0]
        final_results = pd.concat([final_results,trm])
        fr = final_results
        return fr

In [12]:
callback = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    min_delta = 0.001,
    patience = 10,
    verbose = 0,
    mode = 'auto',
    baseline = None,
    restore_best_weights = True
)

In [13]:
# l1_list = [3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
# l2_list = [2,3,4,5,6,7,8,9,10,11,12,13,14,15]
# loss_list = ['mse','mae']
# df = gl.coregister_data('4')
# df = df.drop(df[df['RGIId'].duplicated(keep = False)].index)

# df = df.sample(frac = 1,random_state = 0)
# df = df.reset_index().drop('index', axis = 1)

# dft = df.copy()

# trfeat, tefeat, trlabs, telabs = gl.split_data(df)

# for loss in loss_list:
#     for l2 in l2_list:
#         for l1 in l1_list:
#             if l2 >= l1:
#                 continue
#             else:

#                 model = {}
#                 model_history = {}
#                 normalizer = {}
#                 path = '/data/fast1/glacierml/models/LOO_loss_testing/'
#                 model_path = os.path.join(
#                    path ,loss,str(l1)+'-'+str(l2)
#                 )
#                 res_dir = os.path.join(path, loss,'final_results.pkl')
#                 if os.path.isdir(res_dir) == True:
#                     continue

#                 isdir = model_path
#                 if os.path.isdir(isdir) == True:
#                     continue
#                 if os.path.isdir(isdir) == False:
#                     print(model_path)
#                     set_global_determinism(seed=SEED)
#                     run_model(model_path,l1,l2,loss)
# print('all done')

In [18]:
df = gl.coregister_data('4')
df = df.drop(df[df['RGIId'].duplicated(keep = False)].index)
df = df.sample(frac = 1,random_state = 0)
df = df.reset_index().drop('index', axis = 1)

dft = df.copy()

trfeat = {}
trlabs = {}
tefeat = {}
telabs = {}

for n in df.index:
    mask = df.index.isin([n])
    trfeat[n] = df.loc[~mask].drop(['RGIId','Thickness'], axis = 1)
    tefeat[n] = df.loc[mask].drop(['RGIId','Thickness'], axis = 1)
    trlabs[n] = df['Thickness'].loc[~mask]
    telabs[n] = df['Thickness'].loc[mask]

model = {}
model_history = {}
normalizer = {}
model_path = os.path.join('/data/fast1/glacierml/models/LOO/')
res_dir = os.path.join(model_path, 'final_results.pkl')

for n in tqdm(df.index):
    isdir = os.path.join(
        model_path, str(n)
    )
    if os.path.isdir(isdir) == True:
        model[n] = gl.load_dnn_model(os.path.join(model_path,str(n)))
    elif os.path.isdir(isdir) == False:
        set_global_determinism(seed=SEED)
        model[n] = run_model(model_path, n,6,2,loss = 'mae')


# if os.path.isdir(res_dir) == False:
#     fr = find_results(df,model)              
# #     fr = fr.set_index('i')
#     fr.to_pickle(res_dir)


rgi_est_pth = os.path.join(model_path, 'rgi_est_raw.pkl')

if os.path.isdir(rgi_est_pth) == False:

    RGI = gl.load_RGI()
    rfp = RGI[list(df)[:-1]]

    for n in tqdm(model.keys()):
        preds = pd.Series(
            model[n].predict(rfp.drop('RGIId',axis = 1)).flatten(), name = n
        )
        RGI = pd.concat([RGI,preds], axis = 1)
    RGI.to_pickle(rgi_est_pth)
            # RGI = pd.read_pickle('rgi_est_raw.pkl')

100%|██████████| 273/273 [00:56<00:00,  4.85it/s]
  0%|          | 0/273 [00:00<?, ?it/s]



  0%|          | 1/273 [00:05<26:08,  5.77s/it]



  1%|          | 2/273 [00:10<24:35,  5.44s/it]



  1%|          | 3/273 [00:16<23:59,  5.33s/it]



  1%|▏         | 4/273 [00:21<23:39,  5.28s/it]



  2%|▏         | 5/273 [00:27<24:35,  5.51s/it]



  2%|▏         | 6/273 [00:32<24:03,  5.41s/it]



  3%|▎         | 7/273 [00:37<23:39,  5.33s/it]



  3%|▎         | 8/273 [00:43<23:43,  5.37s/it]



  3%|▎         | 9/273 [00:48<23:26,  5.33s/it]



  4%|▎         | 10/273 [00:53<23:12,  5.29s/it]



  4%|▍         | 11/273 [00:59<23:49,  5.45s/it]



  4%|▍         | 12/273 [01:04<23:27,  5.39s/it]



  5%|▍         | 13/273 [01:09<23:09,  5.35s/it]



  5%|▌         | 14/273 [01:15<22:55,  5.31s/it]



  5%|▌         | 15/273 [01:20<23:30,  5.47s/it]



  6%|▌         | 16/273 [01:26<23:07,  5.40s/it]



  6%|▌         | 17/273 [01:31<22:47,  5.34s/it]



  7%|▋         | 18/273 [01:38<25:21,  5.97s/it]



  7%|▋         | 19/273 [01:44<24:22,  5.76s/it]



  7%|▋         | 20/273 [01:49<23:38,  5.61s/it]



  8%|▊         | 21/273 [01:54<23:05,  5.50s/it]



  8%|▊         | 22/273 [01:59<22:40,  5.42s/it]



  8%|▊         | 23/273 [02:05<22:21,  5.37s/it]



  9%|▉         | 24/273 [02:10<22:07,  5.33s/it]



  9%|▉         | 25/273 [02:15<21:55,  5.30s/it]



 10%|▉         | 26/273 [02:20<21:44,  5.28s/it]



 10%|▉         | 27/273 [02:26<21:35,  5.26s/it]



 10%|█         | 28/273 [02:31<21:27,  5.26s/it]



 11%|█         | 29/273 [02:36<21:21,  5.25s/it]



 11%|█         | 30/273 [02:41<21:15,  5.25s/it]



 11%|█▏        | 31/273 [02:46<21:09,  5.25s/it]



 12%|█▏        | 32/273 [02:52<21:04,  5.25s/it]



 12%|█▏        | 33/273 [02:57<21:28,  5.37s/it]



 12%|█▏        | 34/273 [03:03<21:56,  5.51s/it]



 13%|█▎        | 35/273 [03:08<21:31,  5.43s/it]



 13%|█▎        | 36/273 [03:14<21:45,  5.51s/it]



 14%|█▎        | 37/273 [03:19<21:21,  5.43s/it]



 14%|█▍        | 38/273 [03:25<21:44,  5.55s/it]



 14%|█▍        | 39/273 [03:31<21:32,  5.52s/it]



 15%|█▍        | 40/273 [03:36<21:04,  5.43s/it]



 15%|█▌        | 41/273 [03:42<21:27,  5.55s/it]



 15%|█▌        | 42/273 [03:47<21:30,  5.59s/it]



 16%|█▌        | 43/273 [03:53<21:48,  5.69s/it]



 16%|█▌        | 44/273 [03:59<21:49,  5.72s/it]



 16%|█▋        | 45/273 [04:05<21:31,  5.67s/it]



 17%|█▋        | 46/273 [04:10<21:06,  5.58s/it]



 17%|█▋        | 47/273 [04:15<20:34,  5.46s/it]



 18%|█▊        | 48/273 [04:20<20:10,  5.38s/it]



 18%|█▊        | 49/273 [04:26<19:52,  5.32s/it]



 18%|█▊        | 50/273 [04:31<19:38,  5.29s/it]



 19%|█▊        | 51/273 [04:36<19:30,  5.27s/it]



 19%|█▉        | 52/273 [04:41<19:20,  5.25s/it]



 19%|█▉        | 53/273 [04:46<19:13,  5.24s/it]



 20%|█▉        | 54/273 [04:52<19:07,  5.24s/it]



 20%|██        | 55/273 [04:57<18:59,  5.23s/it]



 21%|██        | 56/273 [05:02<18:51,  5.22s/it]



 21%|██        | 57/273 [05:08<19:32,  5.43s/it]



 21%|██        | 58/273 [05:14<19:56,  5.56s/it]



 22%|██▏       | 59/273 [05:20<19:58,  5.60s/it]



 22%|██▏       | 60/273 [05:25<19:27,  5.48s/it]



 22%|██▏       | 61/273 [05:30<19:04,  5.40s/it]



 23%|██▎       | 62/273 [05:35<18:45,  5.33s/it]



 23%|██▎       | 63/273 [05:40<18:31,  5.29s/it]



 23%|██▎       | 64/273 [05:46<18:23,  5.28s/it]



 24%|██▍       | 65/273 [05:51<18:17,  5.28s/it]



 24%|██▍       | 66/273 [05:56<18:11,  5.27s/it]



 25%|██▍       | 67/273 [06:02<18:44,  5.46s/it]



 25%|██▍       | 68/273 [06:08<18:58,  5.56s/it]



 25%|██▌       | 69/273 [06:13<18:35,  5.47s/it]



 26%|██▌       | 70/273 [06:18<18:20,  5.42s/it]



 26%|██▌       | 71/273 [06:24<18:00,  5.35s/it]



 26%|██▋       | 72/273 [06:29<17:48,  5.32s/it]



 27%|██▋       | 73/273 [06:34<17:37,  5.29s/it]



 27%|██▋       | 74/273 [06:39<17:27,  5.26s/it]



 27%|██▋       | 75/273 [06:44<17:17,  5.24s/it]



 28%|██▊       | 76/273 [06:50<17:18,  5.27s/it]



 28%|██▊       | 77/273 [06:55<17:22,  5.32s/it]



 29%|██▊       | 78/273 [07:01<17:22,  5.34s/it]



 29%|██▉       | 79/273 [07:06<17:21,  5.37s/it]



 29%|██▉       | 80/273 [07:11<17:17,  5.38s/it]



 30%|██▉       | 81/273 [07:17<17:07,  5.35s/it]



 30%|███       | 82/273 [07:22<16:59,  5.34s/it]



 30%|███       | 83/273 [07:27<16:51,  5.33s/it]



 31%|███       | 84/273 [07:33<16:47,  5.33s/it]



 31%|███       | 85/273 [07:38<16:47,  5.36s/it]



 32%|███▏      | 86/273 [07:44<16:44,  5.37s/it]



 32%|███▏      | 87/273 [07:49<16:35,  5.35s/it]



 32%|███▏      | 88/273 [07:54<16:33,  5.37s/it]



 33%|███▎      | 89/273 [08:00<16:30,  5.39s/it]



 33%|███▎      | 90/273 [08:05<16:22,  5.37s/it]



 33%|███▎      | 91/273 [08:10<16:21,  5.39s/it]



 34%|███▎      | 92/273 [08:16<16:18,  5.40s/it]



 34%|███▍      | 93/273 [08:21<16:08,  5.38s/it]



 34%|███▍      | 94/273 [08:27<16:05,  5.39s/it]



 35%|███▍      | 95/273 [08:32<16:03,  5.41s/it]



 35%|███▌      | 96/273 [08:37<15:55,  5.40s/it]



 36%|███▌      | 97/273 [08:43<16:06,  5.49s/it]



 36%|███▌      | 98/273 [08:49<16:23,  5.62s/it]



 36%|███▋      | 99/273 [08:57<18:32,  6.39s/it]



 37%|███▋      | 100/273 [09:03<17:33,  6.09s/it]



 37%|███▋      | 101/273 [09:08<16:51,  5.88s/it]



 37%|███▋      | 102/273 [09:14<16:53,  5.93s/it]



 38%|███▊      | 103/273 [09:20<16:51,  5.95s/it]



 38%|███▊      | 104/273 [09:26<16:20,  5.80s/it]



 38%|███▊      | 105/273 [09:31<15:54,  5.68s/it]



 39%|███▉      | 106/273 [09:37<15:43,  5.65s/it]



 39%|███▉      | 107/273 [09:42<15:26,  5.58s/it]



 40%|███▉      | 108/273 [09:47<15:13,  5.54s/it]



 40%|███▉      | 109/273 [09:53<15:04,  5.51s/it]



 40%|████      | 110/273 [09:58<14:52,  5.47s/it]



 41%|████      | 111/273 [10:04<14:38,  5.42s/it]



 41%|████      | 112/273 [10:09<14:25,  5.38s/it]



 41%|████▏     | 113/273 [10:14<14:15,  5.35s/it]



 42%|████▏     | 114/273 [10:20<14:20,  5.41s/it]



 42%|████▏     | 115/273 [10:25<14:14,  5.41s/it]



 42%|████▏     | 116/273 [10:31<14:21,  5.49s/it]



 43%|████▎     | 117/273 [10:36<14:11,  5.46s/it]



 43%|████▎     | 118/273 [10:42<14:04,  5.45s/it]



 44%|████▎     | 119/273 [10:47<13:55,  5.42s/it]



 44%|████▍     | 120/273 [10:52<13:47,  5.41s/it]



 44%|████▍     | 121/273 [10:58<13:38,  5.38s/it]



 45%|████▍     | 122/273 [11:03<13:26,  5.34s/it]



 45%|████▌     | 123/273 [11:08<13:22,  5.35s/it]



 45%|████▌     | 124/273 [11:14<13:17,  5.35s/it]



 46%|████▌     | 125/273 [11:19<13:09,  5.34s/it]



 46%|████▌     | 126/273 [11:24<13:00,  5.31s/it]



 47%|████▋     | 127/273 [11:29<12:55,  5.31s/it]



 47%|████▋     | 128/273 [11:35<12:48,  5.30s/it]



 47%|████▋     | 129/273 [11:40<12:41,  5.29s/it]



 48%|████▊     | 130/273 [11:45<12:35,  5.28s/it]



 48%|████▊     | 131/273 [11:51<12:30,  5.29s/it]



 48%|████▊     | 132/273 [11:56<12:24,  5.28s/it]



 49%|████▊     | 133/273 [12:01<12:17,  5.27s/it]



 49%|████▉     | 134/273 [12:06<12:13,  5.28s/it]



 49%|████▉     | 135/273 [12:12<12:22,  5.38s/it]



 50%|████▉     | 136/273 [12:17<12:17,  5.39s/it]



 50%|█████     | 137/273 [12:23<12:07,  5.35s/it]



 51%|█████     | 138/273 [12:28<11:59,  5.33s/it]



 51%|█████     | 139/273 [12:33<11:57,  5.35s/it]



 51%|█████▏    | 140/273 [12:39<11:52,  5.36s/it]



 52%|█████▏    | 141/273 [12:44<11:43,  5.33s/it]



 52%|█████▏    | 142/273 [12:49<11:36,  5.32s/it]



 52%|█████▏    | 143/273 [12:55<11:31,  5.32s/it]



 53%|█████▎    | 144/273 [13:00<11:25,  5.31s/it]



 53%|█████▎    | 145/273 [13:05<11:17,  5.29s/it]



 53%|█████▎    | 146/273 [13:10<11:15,  5.32s/it]



 54%|█████▍    | 147/273 [13:16<11:12,  5.34s/it]



 54%|█████▍    | 148/273 [13:21<11:11,  5.37s/it]



 55%|█████▍    | 149/273 [13:27<11:05,  5.37s/it]



 55%|█████▍    | 150/273 [13:32<10:59,  5.36s/it]



 55%|█████▌    | 151/273 [13:37<10:58,  5.40s/it]



 56%|█████▌    | 152/273 [13:43<10:56,  5.42s/it]



 56%|█████▌    | 153/273 [13:48<10:49,  5.41s/it]



 56%|█████▋    | 154/273 [13:54<10:45,  5.43s/it]



 57%|█████▋    | 155/273 [13:59<10:40,  5.43s/it]



 57%|█████▋    | 156/273 [14:05<10:32,  5.41s/it]



 58%|█████▊    | 157/273 [14:10<10:21,  5.36s/it]



 58%|█████▊    | 158/273 [14:15<10:12,  5.33s/it]



 58%|█████▊    | 159/273 [14:20<10:04,  5.30s/it]



 59%|█████▊    | 160/273 [14:26<09:57,  5.29s/it]



 59%|█████▉    | 161/273 [14:31<09:51,  5.28s/it]



 59%|█████▉    | 162/273 [14:36<09:46,  5.28s/it]



 60%|█████▉    | 163/273 [14:41<09:40,  5.28s/it]



 60%|██████    | 164/273 [14:47<09:37,  5.30s/it]



 60%|██████    | 165/273 [14:52<09:34,  5.32s/it]



 61%|██████    | 166/273 [14:57<09:27,  5.30s/it]



 61%|██████    | 167/273 [15:03<09:23,  5.32s/it]



 62%|██████▏   | 168/273 [15:08<09:19,  5.32s/it]



 62%|██████▏   | 169/273 [15:13<09:11,  5.30s/it]



 62%|██████▏   | 170/273 [15:19<09:07,  5.31s/it]



 63%|██████▎   | 171/273 [15:25<09:18,  5.48s/it]



 63%|██████▎   | 172/273 [15:31<09:28,  5.63s/it]



 63%|██████▎   | 173/273 [15:36<09:32,  5.72s/it]



 64%|██████▎   | 174/273 [15:42<09:32,  5.78s/it]



 64%|██████▍   | 175/273 [15:48<09:11,  5.63s/it]



 64%|██████▍   | 176/273 [15:53<08:57,  5.54s/it]



 65%|██████▍   | 177/273 [15:58<08:45,  5.47s/it]



 65%|██████▌   | 178/273 [16:04<08:36,  5.44s/it]



 66%|██████▌   | 179/273 [16:11<09:36,  6.13s/it]



 66%|██████▌   | 180/273 [16:17<09:08,  5.90s/it]



 66%|██████▋   | 181/273 [16:22<08:47,  5.73s/it]



 67%|██████▋   | 182/273 [16:27<08:30,  5.61s/it]



 67%|██████▋   | 183/273 [16:33<08:18,  5.54s/it]



 67%|██████▋   | 184/273 [16:38<08:07,  5.48s/it]



 68%|██████▊   | 185/273 [16:44<08:00,  5.46s/it]



 68%|██████▊   | 186/273 [16:49<07:54,  5.46s/it]



 68%|██████▊   | 187/273 [16:54<07:47,  5.44s/it]



 69%|██████▉   | 188/273 [17:00<07:41,  5.43s/it]



 69%|██████▉   | 189/273 [17:05<07:33,  5.40s/it]



 70%|██████▉   | 190/273 [17:10<07:25,  5.37s/it]



 70%|██████▉   | 191/273 [17:16<07:19,  5.35s/it]



 70%|███████   | 192/273 [17:21<07:13,  5.35s/it]



 71%|███████   | 193/273 [17:27<07:10,  5.38s/it]



 71%|███████   | 194/273 [17:32<07:05,  5.38s/it]



 71%|███████▏  | 195/273 [17:37<06:57,  5.36s/it]



 72%|███████▏  | 196/273 [17:43<06:53,  5.37s/it]



 72%|███████▏  | 197/273 [17:48<06:47,  5.36s/it]



 73%|███████▎  | 198/273 [17:54<06:57,  5.57s/it]



 73%|███████▎  | 199/273 [17:59<06:46,  5.50s/it]



 73%|███████▎  | 200/273 [18:05<06:36,  5.43s/it]



 74%|███████▎  | 201/273 [18:10<06:33,  5.46s/it]



 74%|███████▍  | 202/273 [18:16<06:25,  5.43s/it]



 74%|███████▍  | 203/273 [18:21<06:16,  5.38s/it]



 75%|███████▍  | 204/273 [18:26<06:08,  5.33s/it]



 75%|███████▌  | 205/273 [18:31<06:01,  5.32s/it]



 75%|███████▌  | 206/273 [18:37<05:56,  5.33s/it]



 76%|███████▌  | 207/273 [18:42<05:50,  5.31s/it]



 76%|███████▌  | 208/273 [18:47<05:44,  5.30s/it]



 77%|███████▋  | 209/273 [18:52<05:38,  5.29s/it]



 77%|███████▋  | 210/273 [18:58<05:32,  5.28s/it]



 77%|███████▋  | 211/273 [19:03<05:28,  5.30s/it]



 78%|███████▊  | 212/273 [19:08<05:23,  5.31s/it]



 78%|███████▊  | 213/273 [19:14<05:17,  5.29s/it]



 78%|███████▊  | 214/273 [19:19<05:12,  5.30s/it]



 79%|███████▉  | 215/273 [19:24<05:07,  5.31s/it]



 79%|███████▉  | 216/273 [19:30<05:03,  5.32s/it]



 79%|███████▉  | 217/273 [19:35<04:56,  5.30s/it]



 80%|███████▉  | 218/273 [19:40<04:50,  5.28s/it]



 80%|████████  | 219/273 [19:45<04:45,  5.29s/it]



 81%|████████  | 220/273 [19:51<04:41,  5.30s/it]



 81%|████████  | 221/273 [19:56<04:34,  5.29s/it]



 81%|████████▏ | 222/273 [20:01<04:29,  5.28s/it]



 82%|████████▏ | 223/273 [20:07<04:23,  5.28s/it]



 82%|████████▏ | 224/273 [20:12<04:18,  5.27s/it]



 82%|████████▏ | 225/273 [20:17<04:13,  5.28s/it]



 83%|████████▎ | 226/273 [20:22<04:07,  5.28s/it]



 83%|████████▎ | 227/273 [20:28<04:03,  5.30s/it]



 84%|████████▎ | 228/273 [20:33<03:57,  5.28s/it]



 84%|████████▍ | 229/273 [20:38<03:52,  5.29s/it]



 84%|████████▍ | 230/273 [20:44<03:48,  5.30s/it]



 85%|████████▍ | 231/273 [20:49<03:43,  5.32s/it]



 85%|████████▍ | 232/273 [20:55<03:41,  5.40s/it]



 85%|████████▌ | 233/273 [21:00<03:35,  5.38s/it]



 86%|████████▌ | 234/273 [21:05<03:29,  5.38s/it]



 86%|████████▌ | 235/273 [21:11<03:24,  5.37s/it]



 86%|████████▋ | 236/273 [21:16<03:18,  5.37s/it]



 87%|████████▋ | 237/273 [21:21<03:13,  5.36s/it]



 87%|████████▋ | 238/273 [21:27<03:07,  5.36s/it]



 88%|████████▊ | 239/273 [21:32<03:02,  5.35s/it]



 88%|████████▊ | 240/273 [21:37<02:56,  5.35s/it]



 88%|████████▊ | 241/273 [21:43<02:51,  5.34s/it]



 89%|████████▊ | 242/273 [21:48<02:45,  5.34s/it]



 89%|████████▉ | 243/273 [21:53<02:41,  5.37s/it]



 89%|████████▉ | 244/273 [21:59<02:35,  5.36s/it]



 90%|████████▉ | 245/273 [22:04<02:30,  5.36s/it]



 90%|█████████ | 246/273 [22:10<02:24,  5.35s/it]



 90%|█████████ | 247/273 [22:15<02:19,  5.35s/it]



 91%|█████████ | 248/273 [22:20<02:13,  5.32s/it]



 91%|█████████ | 249/273 [22:25<02:07,  5.30s/it]



 92%|█████████▏| 250/273 [22:31<02:01,  5.29s/it]



 92%|█████████▏| 251/273 [22:36<01:56,  5.27s/it]



 92%|█████████▏| 252/273 [22:41<01:50,  5.26s/it]



 93%|█████████▎| 253/273 [22:46<01:45,  5.28s/it]



 93%|█████████▎| 254/273 [22:52<01:40,  5.27s/it]



 93%|█████████▎| 255/273 [22:59<01:47,  6.00s/it]



 94%|█████████▍| 256/273 [23:05<01:39,  5.83s/it]



 94%|█████████▍| 257/273 [23:10<01:31,  5.71s/it]



 95%|█████████▍| 258/273 [23:16<01:24,  5.62s/it]



 95%|█████████▍| 259/273 [23:21<01:17,  5.56s/it]



 95%|█████████▌| 260/273 [23:26<01:11,  5.49s/it]



 96%|█████████▌| 261/273 [23:32<01:05,  5.44s/it]



 96%|█████████▌| 262/273 [23:37<00:59,  5.41s/it]



 96%|█████████▋| 263/273 [23:42<00:53,  5.38s/it]



 97%|█████████▋| 264/273 [23:48<00:48,  5.36s/it]



 97%|█████████▋| 265/273 [23:53<00:42,  5.35s/it]



 97%|█████████▋| 266/273 [23:58<00:37,  5.33s/it]



 98%|█████████▊| 267/273 [24:04<00:31,  5.32s/it]



 98%|█████████▊| 268/273 [24:09<00:26,  5.32s/it]



 99%|█████████▊| 269/273 [24:14<00:21,  5.31s/it]



 99%|█████████▉| 270/273 [24:20<00:16,  5.34s/it]



 99%|█████████▉| 271/273 [24:25<00:10,  5.36s/it]



100%|█████████▉| 272/273 [24:30<00:05,  5.35s/it]



100%|██████████| 273/273 [24:36<00:00,  5.41s/it]


In [15]:
fr

Unnamed: 0,0,Model Loss,Percent Error,RMSE,VRes Mean,VRes STD,VRes Max,VRes Min,VRes IQR
0,0,7.17955,-14.3591,7.17955,-3.363408,28.332261,87.941605,-245.003983,20.968861


In [20]:
model[0].summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 9)                19        
 n)                                                              
                                                                 
 dense (Dense)               (None, 6)                 60        
                                                                 
 dropout (Dropout)           (None, 6)                 0         
                                                                 
 dense_1 (Dense)             (None, 2)                 14        
                                                                 
 dense_2 (Dense)             (None, 1)                 3         
                                                                 
Total params: 96
Trainable params: 77
Non-trainable params: 19
___________________________________________________________

In [None]:
model

In [None]:
# # feat_sort = ['shuffle','none','Thickness','Area']
# # asc_list = ['none',True,False]
# for feat in feat_sort:
#     for asc in asc_list:
#         df = gl.coregister_data('4')
#         df = df.drop(df[df['RGIId'].duplicated(keep = False)].index)
# #         df = df.drop(['CenLat','CenLon','Zmed','Aspect'],axis = 1)
#         if feat == 'none' and (asc == True or asc == False):
#             continue
#         elif feat == 'none' and asc == 'none':
#             df = df.reset_index().drop('index',axis = 1)
#             a = 'none'
#         elif feat == 'shuffle' and asc == 'none':
#             df = df.sample(frac = 1,random_state = 0)
#             df = df.reset_index().drop('index', axis = 1)
#             a = 'none'
#         elif (feat == 'Thickness' or feat == 'Area') and asc == True:
#             a = 'ascending'
#             df = df.sort_values(feat,ascending = asc).reset_index().drop('index',axis = 1)
#         elif (feat == 'Thickness' or feat == 'Area') and asc == False:
#             a = 'descending'
#             df = df.sort_values(feat,ascending = asc).reset_index().drop('index',axis = 1)
#         elif (feat == 'Thickness' or feat == 'Area') and asc == 'none':
#             continue
#         elif (feat != 'none' or feat != 'shuffle') and asc != 'none':
#             continue


#         dft = df.copy()

#         trfeat = {}
#         trlabs = {}
#         tefeat = {}
#         telabs = {}

#         for n in df.index:
#             mask = df.index.isin([n])
#             trfeat[n] = df.loc[~mask].drop(['RGIId','Thickness'], axis = 1)
#             tefeat[n] = df.loc[mask].drop(['RGIId','Thickness'], axis = 1)
#             trlabs[n] = df['Thickness'].loc[~mask]
#             telabs[n] = df['Thickness'].loc[mask]
            
#         model = {}
#         model_history = {}
#         normalizer = {}
#         model_path = os.path.join('/data/fast1/glacierml/models/LOO1/',feat,a)
#         print(model_path)
#         res_dir = os.path.join(model_path, 'final_results.pkl')
#         if os.path.isdir(res_dir) == True:
#             continue
            
#         for n in tqdm(df.index):
#             isdir = os.path.join(
#                 model_path, str(n)
#             )
#             if os.path.isdir(isdir) == True:
#                 model[n] = gl.load_dnn_model(os.path.join(model_path,str(n)))
#             elif os.path.isdir(isdir) == False:
#                 set_global_determinism(seed=SEED)
#                 run_model(model_path, n)
                

#         if os.path.isdir(res_dir) == False:
#             fr = find_results(df,model,n)              
#             fr['unc'] = fr['RMSE'] + fr['Percent Error']
#             fr = fr.set_index('i')
#             fr.to_pickle(res_dir)
            
            
#         rgi_est_pth = os.path.join(model_path, 'rgi_est_raw.pkl')

#         if os.path.isdir(rgi_est_pth) == True:
#             continue
#         elif os.path.isdir(rgi_est_pth) == False:

#             RGI = gl.load_RGI()
#             rfp = RGI[list(df)[:-1]]

#             for n in tqdm(model.keys()):
#                 preds = pd.Series(
#                     model[n].predict(rfp.drop('RGIId',axis = 1)).flatten(), name = n
#                 )
#                 RGI = pd.concat([RGI,preds], axis = 1)
#             RGI.to_pickle(rgi_est_pth)
#             # RGI = pd.read_pickle('rgi_est_raw.pkl')


In [None]:
crash()

In [None]:
# fr = pd.read_pickle(model_path + 'final_results_2.pkl')

In [None]:
# fr.sort_values('Percent Error')

## Looking at LOO first results

In [None]:
# ls = 12
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr['Thickness']
# y = fr['est']
# ax[0].scatter(
#     x,y, alpha = 0.25
# )
# ax[0].plot(
#     (x.min(),x.max()),
#     (x.min(),x.max()),
#     '-k'
# )
# ax[0].set_ylabel('LOO Estimated Thickness',fontsize = ls)

# y = fr['Percent Error']
# ax[1].scatter(
#     x,y, alpha = 0.25
# )
# ax[1].plot(
#     (x.min(),x.max()),
#     (0,0),
#     '-k'
# )
# ax[1].set_ylabel('LOO Percent Error',fontsize = ls)
# fig.supxlabel('GlaThiDa Thickness', y = 0.05,fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

In [None]:
# ls = 12
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr['Thickness']
# y = fr['est']
# ax[0].scatter(
#     x,y, alpha = 0.25
# )
# ax[0].plot(
#     (x.min(),x.max()),
#     (x.min(),x.max()),
#     '-k'
# )
# ax[0].set_ylabel('LOO Estimated Thickness',fontsize = ls)

# y = fr['Percent Error']
# x = fr['Area']
# ax[1].scatter(
#     x,y, alpha = 0.25
# )
# ax[1].plot(
#     (x.min(),x.max()),
#     (0,0),
#     '-k'
# )
# ax[1].set_xscale('log')
# ax[1].set_ylabel('LOO Percent Error',fontsize = ls)
# fig.supxlabel('GlaThiDa Thickness', y = 0.05,fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

In [None]:
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr['Area']
# y = fr['est']
# ax[0].scatter(
#     x,y,alpha = 0.25
# )
# ax[0].set_xscale('log')

# ax[0].set_ylabel('LOO Estimated Thickness', fontsize = ls)

# y = fr['Thickness']
# ax[1].scatter(
#     x,y,alpha = 0.25
# )
# ax[1].set_xscale('log')
# ax[1].set_ylabel('GlaThiDa Survey Thickness',fontsize = ls)
# fig.supxlabel('Glacier Area (km$^2$)', y = 0.05,fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

### Let's see if a linear model does anything different

In [None]:
# lin_model = {}
# lin_model_history = {}
# l1 = 16
# l2 = 4
# normalizer = {}
# loss = 'mae'
# model_path = '/data/fast1/glacierml/models/LOO_linear/'
# for n in tqdm(df.index):
#     isdir = os.path.join(
#         model_path ,str(n)
#     )
#     if os.path.isdir(isdir) == True:
#         lin_model[n] = gl.load_dnn_model(isdir)
#     elif os.path.isdir(isdir) == False:
        

# #         total_inputs = (len(df.columns)) * (len(df) - 1)
# #         dp = int(pr * total_inputs)
# #         tp = dp - (len(df.columns) + (len(df.columns)-1) )
# #         g = (len(df.columns) + (len(df.columns) - 1))
# #         l2[n] = 4
# #         l1[n] = int((dp - 1 - g - 2*l2[n]) / (10 + l2[n]))
#         normalizer = preprocessing.Normalization(axis=-1)
#         normalizer.adapt(np.array(trfeat[n]))

#         lin_model[n] = gl.build_linear_model(
#             normalizer, learning_rate = 0.01, 
#             layer_1 = l1, layer_2 = l2
#         )

#         lin_model_history[n] = model[n].fit(
#             trfeat[n],
#             trlabs[n],
#             validation_split=0.2,
#             callbacks = [callback],
#             verbose=0, 
#             epochs=500
#         )
#         model_filename = isdir
#         lin_model[n].save(model_filename)

In [None]:
# results = {}
# residuals = {}
# final_results = pd.DataFrame()
# # residuals = pd.DataFrame()
# y = {}
# for n in tqdm(df.index):
#     results[n] = model[n].evaluate(tefeat[n], telabs[n],verbose = 0)
#     y[n] = model[n].predict(tefeat[n],verbose = 0).flatten()
# #     residuals[n] = y[n] - telabs[n].values[0]
# #     p_res = residuals[n] / telabs[n].values[0]
#     RMSE = np.sqrt(metrics.mean_squared_error(telabs[n].values, y[n]))
#     MAPerror = np.mean((y[n] - (telabs[n].values[0])) / telabs[n].values[0]) * 100 


# #     tq75 = np.percentile(residuals[n], 75)
# #     tq25 = np.percentile(residuals[n], 25)

# #     TIQR = tq75 - tq25

#     z = model[n].predict(df.drop(['RGIId','Thickness'],axis = 1),verbose = 0).flatten()
#     ver_res = z - df['Thickness']
#     ver_res_mean = np.mean(ver_res)
#     ver_res_std =  np.std(ver_res)
#     vq75 = np.percentile(ver_res,75)
#     vq25 = np.percentile(ver_res,25)
#     VIQR = vq75 - vq25

#     test_res_n = pd.DataFrame(pd.Series(n,name = 'i'))
#     test_res = pd.DataFrame(pd.Series((RMSE), name = 'Test Res'))
# #     test_res_std = pd.DataFrame(pd.Series(np.std(residuals[n].values),name = 'Val STD'))

#     test_res_n['Model Loss'] = results[n]
#     test_res_n['RMSE'] = RMSE
#     test_res_n['Percent Error'] = MAPerror
#     trm = test_res_n.join(test_res)
# #     test_res_mean = test_res_mean.join(test_res_std)
# #     test_res_mean['TRes Max'] = residuals[n].max()
# #     test_res_mean['TRes Min'] = residuals[n].min()
# #     test_res_mean['TRes IQR'] = TIQR
#     trm['VRes Mean'] = ver_res_mean    
#     trm['VRes STD'] = ver_res_std
#     trm['VRes Max'] = ver_res.max()
#     trm['VRes Min'] = ver_res.min()
#     trm['VRes IQR'] = VIQR
#     trm['est'] = y[n]
#     trm['Thickness'] = telabs[n].values[0]
#     trm['Area'] = tefeat[n]['Area'].values[0]
#     trm['Lmax'] = tefeat[n]['Lmax'].values[0]
#     trm['Slope'] = tefeat[n]['Slope'].values[0]
#     trm['Zmin'] = tefeat[n]['Zmin'].values[0]
#     trm['Zmed'] = tefeat[n]['Zmed'].values[0]
#     trm['Zmax'] = tefeat[n]['Zmax'].values[0]
#     trm['CenLat'] = tefeat[n]['CenLat'].values[0]
#     trm['CenLon'] = tefeat[n]['CenLon'].values[0]
#     trm['Aspect'] = tefeat[n]['Aspect'].values[0]
#     final_results = pd.concat([final_results,trm])
# fr_lin = final_results
# fr_lin['unc'] = fr_lin['RMSE'] + fr_lin['Percent Error']
# fr_lin = fr_lin.set_index('i')
# fr_lin.to_pickle(model_path + 'final_results_linear.pkl')

In [None]:
# fr_lin = pd.read_pickle(model_path + 'final_results_linear.pkl')

In [None]:
# fr_lin.sort_values('Percent Error')

In [None]:
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr_lin['Thickness']
# y = fr_lin['est']
# ax[0].scatter(
#     x,y, alpha = 0.25
# )
# ax[0].plot(
#     (x.min(),x.max()),
#     (x.min(),x.max()),
#     '-k'
# )
# # ax[0].set_xscale('log')
# # ax[0].set_yscale('log')
# ax[0].set_ylabel('LOO Estimated Thickness', fontsize = ls)
# fig.supxlabel('GlaThiDa Thickness', y = 0.05, fontsize = ls)

# y = fr_lin['Percent Error']
# ax[1].scatter(
#     x,y, alpha = 0.25
# )
# ax[1].plot(
#     (x.min(),x.max()),
#     (0,0),
#     '-k'
# )
# ax[1].set_ylabel('LOO Percent Error', fontsize = ls)
# ax[0].tick_params(axis='both', labelsize=ls)
# ax[1].tick_params(axis='both', labelsize=ls)
# plt.tight_layout()

In [None]:
# fr.sort_values('RMSE')

In [None]:
# fig, ax = plt.subplots(1,2,figsize = (10,4))
# x = fr_lin['Area']
# y = fr_lin['est']
# ax[0].scatter(
#     x,y,alpha = 0.25
# )
# ax[0].set_xscale('log')
# ax[0].set_ylabel('LOO Estimated Thickness')

# y = fr_lin['Thickness']
# ax[1].scatter(
#     x,y,alpha = 0.25
# )
# ax[1].set_xscale('log')
# ax[1].set_ylabel('GlaThiDa Survey Thickness')
# fig.supxlabel('Glacier Area (km$^2$)', y = -.05)

In [None]:
# plt.scatter(
#     fr['est'],
#     fr['RMSE'],
#     alpha = 0.25
# )

## Are residuals and percent residuals normally distributed with features?

In [None]:
# for feat in ['Area','Lmax','Slope','Zmin']:
#     plt.scatter(
#         fr[feat],
#         fr['RMSE']
#     )
#     if feat == 'Area' or feat == 'Lmax':
#         plt.xscale('log')
#     plt.xlabel(feat)
#     plt.ylabel('LOO RMSE')
#     plt.show()

# for feat in ['Area','Lmax','Slope','Zmin']:
#     plt.scatter(
#         fr[feat],
#         fr['Percent Error']
#     )
#     if feat == 'Area' or feat == 'Lmax':
#         plt.xscale('log')
#     plt.xlabel(feat)
#     plt.ylabel('LOO % error')
#     plt.show()

In [None]:
# plt.scatter(
#     fr['Area'],
#     fr['RMSE']
# )
# plt.yscale('log')
# plt.xscale('log')

### Use each LOO model to predict RGI

In [None]:

cols = []
for i in range(341):
    cols.append(i)



rgi_list = list(df)[:-1]
rgi_list.append('RGIId')

In [None]:
unc_cols = []
for i in range(341):
    unc_cols.append(str(i) + '_')

In [None]:
df = pd.merge(df, RGI, how = 'inner', on = rgi_list)
X = np.mean(df[range(341)], axis = 1)
se = np.std(df[range(341)], axis = 1) / np.sqrt(341)

df['LCI'] = X - (1.96 * se)
df['UCI'] = X + (1.96 * se)

lb = df['LCI'] / 1e3 * df['Area'] / 1e3
ub = df['UCI'] / 1e3 * df['Area'] / 1e3

In [None]:
x = df['Thickness']
# y = np.mean(dfci, axis = 1)
# y = df['we']
y = np.mean(df[cols],axis = 1)
plt.errorbar(
    x,y,yerr = df['UCI'] - df['LCI'],
        alpha = 0.25,
#     label = 'Estimates $\hat{\mu}(x)$',
    linestyle = 'None',
    marker = '.',
    capsize = 8,
    color = '#1f77b4',
)

plt.plot(
    (x.min(),x.max()),
    (x.min(),x.max()),
    '-k'
)

plt.ylabel('Estimated Thickness')
plt.xlabel('GlaThiDa Survey')
plt.title('Leave-One-Out X-val 95% CI')

In [None]:
fig, ax = plt.subplots(1,2,figsize = (10,4))
x = df['Area']
y1 = df['UCI']
y2 = df['LCI']
ax[0].scatter(
    x,y1,alpha = 0.25
)
ax[0].scatter(
    x,y2,alpha = 0.25
)
ax[0].set_xscale('log')
ax[0].set_ylabel('LOO Thickness Upper CI')

y = df['Thickness']
ax[1].scatter(
    x,y,alpha = 0.25
)
ax[1].set_xscale('log')
ax[1].set_ylabel('GlaThiDa Survey Thickness')
fig.supxlabel('Glacier Area (km$^2$)', y = -.05)

In [None]:
x = df['Area']
y = df['UCI']
plt.scatter(x,y,alpha = 0.25)
plt.xscale('log')

In [None]:
ref_pth = '/data/fast1/glacierml/data/reference_thicknesses/'
ref = pd.DataFrame()
for file in os.listdir(ref_pth):
    if 'Farinotti' in file:
        file_reader = pd.read_csv('reference_thicknesses/' + file)
        ref = pd.concat([ref, file_reader], ignore_index = True) 
ref = ref.rename(columns = {
     'Farinotti Mean Thickness':'FMT',
})
ref = ref[[
     'FMT',
     'RGIId',
]]

df = pd.merge(df, ref, how = 'inner', on = 'RGIId')

In [None]:
df['est'] = df[['UCI','LCI']].mean(axis = 1)

In [None]:
x = df['Thickness']
# y = np.mean(dfci, axis = 1)
# y = df['we']
y = df['est']
plt.scatter(
    x,y,
        alpha = 0.25,
#     label = 'Estimates $\hat{\mu}(x)$',
    marker = '.',
)

plt.plot(
    (x.min(),x.max()),
    (x.min(),x.max()),
    '-k'
)

plt.ylabel('Estimated Thickness')
plt.xlabel('GlaThiDa Survey')
# plt.title('Leave-One-Out X-val 95% CI')

In [None]:
obs = df['Thickness']
obs_mean = obs.mean()
obs_std = obs.std()
obs_se = obs_mean / obs_std

pred = df['est']
pred_mean = pred.mean()
pred_std = pred.std()
pred_se = pred_mean / pred_std

pooled_var = (obs_std**2 + pred_std**2) / 2

t = (pred_mean - obs_mean) / 2

print(t)

In [None]:
obs = df['Thickness']
obs_mean = obs.mean()
obs_std = obs.std()
obs_se = obs_mean / obs_std

pred = df['FMT']
pred_mean = pred.mean()
pred_std = pred.std()
pred_se = pred_mean / pred_std

pooled_var = (obs_std**2 + pred_std**2) / 2

t = (pred_mean - obs_mean) / 2

print(t)

In [None]:
cov = np.cov(df[cols].T)

In [None]:
# cov = np.cov(df[cols])
eigenvalues, eigenvectors = np.linalg.eig(cov)

In [None]:
corr = np.corrcoef(df[cols].T)

In [None]:
f = plt.figure(figsize=(13, 12))
plt.matshow(
    cov, fignum=f.number,cmap = 'seismic',vmin=-1000, vmax=1000
)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
# plt.title('Covariance Matrix of Survey Thickness and Model Estimates', fontsize=18)
# plt.xlabel('Model Thickness Estimates',fontsize = 14)
# plt.ylabel('GlaThiDa Survey Thickness',fontsize = 14)

In [None]:
neg_covs_ind = mean_covs[mean_covs < 0].index
zer_covs_ind = mean_covs[mean_covs == 0].index
nan_covs_ind = mean_covs[mean_covs == np.nan].index
pos_covs_ind = mean_covs[mean_covs > 0].index

negs = dft.iloc[neg_covs_ind]
zero = dft.iloc[zer_covs_ind]
pos = dft.iloc[pos_covs_ind]
nans = dft.iloc[nan_covs_ind]

In [None]:
x_eval = np.linspace(0,700,500)

for i in tqdm(range(len(df))):
    x = df[cols].iloc[i]
    kde = st.gaussian_kde(np.array(x))
    plt.plot(x_eval, kde(x_eval), '-',alpha = 0.25)
plt.xscale('symlog')

In [None]:
cov = pd.DataFrame(cov)
corr = pd.DataFrame(corr)

In [None]:
cov_d = cov.drop(cov[cov[cols] <= 0].dropna(axis = 0).index)
corr_d = corr.drop(corr[corr[cols] <= 0].dropna(axis = 0).index)
corr_d = corr.drop(corr[corr[cols] <= 0].dropna(axis = 0).index)

cov_d = cov_d.reset_index().drop('index',axis = 1)
corr_d = corr_d.reset_index().drop('index',axis = 1)

In [None]:
dftr = dft.drop(cov[cov[cols] <= 0].dropna(axis = 0).index)
dftr = dftr.reset_index().drop('index',axis = 1)

In [None]:

p = plt.get_cmap('seismic')
n = 0
for i in tqdm(
    dftr.sort_values('Thickness',ascending = True).index,
#     dftr.sort_values('Thickness',ascending = True).reset_index().index
):
#     print(dft['Thickness'].loc[i])

    
    x =df[cols].loc[i]

    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(
        x_eval, kde(x_eval), '-',alpha = 0.75,
        c = p(n/(len(dft) - 1))
    )
    n = n + 1
#     c = p(n/(len(pos) - 1))
# plt.ylim(0,0.2)
# plt.yscale('log')
plt.xscale('symlog')
plt.ylabel('Likelihood')
plt.xlabel('Thickness')
plt.title('Leave-One-Out Thickness PDF')
divider = make_axes_locatable(plt.gca())
ax_cb = divider.new_horizontal(size="5%", pad=0.05)    
cb1 = mpl.colorbar.ColorbarBase(
    ax_cb, cmap=p, orientation='vertical',
    ticklocation = 'auto',ticks = [],
    label = 'Left-Out Thickness'
)
cb1.set_ticks(ticks = (0,1),labels = ['Min','Max'])
# cb1.set_label('Thickness',x = -0.07)
plt.gcf().add_axes(ax_cb)

#     plt.show()


In [None]:

p = plt.get_cmap('seismic')
n = 0
for i in tqdm(
    dftr.sort_values('Thickness',ascending = True).index,
#     dftr.sort_values('Thickness',ascending = True).reset_index().index
):
#     print(dft['Thickness'].loc[i])

    
    x =cov_d.loc[i]

    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(
        x_eval, kde(x_eval), '-',alpha = 0.25,
        c = p(n/(len(dft) - 1))
    )
    n = n + 1
#     c = p(n/(len(pos) - 1))
# plt.ylim(0,0.2)
plt.yscale('log')
plt.xscale('symlog')
plt.ylabel('Density')
plt.xlabel('Covariance')
plt.title('Leave-One-Out Covariance PDF')
divider = make_axes_locatable(plt.gca())
ax_cb = divider.new_horizontal(size="5%", pad=0.05)    
cb1 = mpl.colorbar.ColorbarBase(
    ax_cb, cmap=p, orientation='vertical',
    ticklocation = 'auto',ticks = [],
    label = 'Thickness index'
)
cb1.set_ticks(ticks = (0,1),labels = ['Min','Max'])
# cb1.set_label('Thickness',x = -0.07)
plt.gcf().add_axes(ax_cb)

#     plt.show()


In [None]:
cov

In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib as mpl

p = plt.get_cmap('seismic')
thickness = dft['Thickness']
n = 0
for i in tqdm(
    dftr.sort_values('Thickness',ascending = True).index,
#     dftr.sort_values('Thickness',ascending = True).reset_index().index
):
#     print(dft['Thickness'].loc[i])
    x = df[cols].loc[i]
    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(
        x_eval, kde(x_eval), '-',alpha = 0.75,
        c = p(n/(len(dft) - 1))
    )
    n = n + 1
#     c = p(n/(len(pos) - 1))
    
plt.xscale('symlog')
# plt.yscale('log')
plt.ylabel('Probability')
plt.xlabel('Covariance')
plt.title('Leave-One-Out Covariance PDF')
divider = make_axes_locatable(plt.gca())
ax_cb = divider.new_horizontal(size="5%", pad=0.05)    
cb1 = mpl.colorbar.ColorbarBase(
    ax_cb, cmap=p, orientation='vertical',
    ticklocation = 'auto',ticks = [],
    label = 'Thickness index'
)
cb1.set_ticks(ticks = (0,1),labels = ['Min','Max'])
# cb1.set_label('Thickness',x = -0.07)
plt.gcf().add_axes(ax_cb)
#     plt.show()


In [None]:
df[cols].min().min()

In [None]:
dft

In [None]:
(n/(len(dft) - 1))

In [None]:
p = plt.get_cmap('seismic')

for i in tqdm(negs.sort_values('Thickness',ascending = True).index):
    print(i)
    x = df[cols].iloc[i]
    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(x_eval, kde(x_eval), '-',alpha = 0.5,c = p(i/(len(pos) - 1)))
plt.xscale('symlog')
# plt.colorbar(p)

In [None]:
p = plt.get_cmap('seismic')

for i in tqdm(zero.sort_values('Thickness',ascending = True).index):
    print(i)
    x = df[cols].iloc[i]
    x_eval = np.linspace(x.min(),x.max(),500)

    kde = st.gaussian_kde(np.array(x))
    plt.plot(x_eval, kde(x_eval), '-',alpha = 0.5,c = p(i/(len(pos) - 1)))
plt.xscale('symlog')
# plt.colorbar(p)

In [None]:
negs

In [None]:
import scipy.stats as st

In [None]:
cov = pd.DataFrame(cov)

In [None]:
mean_covs = np.mean(cov, axis = 0)

In [None]:

plt.scatter(    pos['Zmin'],pos['Thickness'],alpha = 0.25
)
plt.scatter(
    negs['Zmin'],negs['Thickness']
)
# plt.xscale('log')

In [None]:
np.argsort(np.sum(cov,axis=0))

In [None]:
loo = np.array(df[cols].T)
plt.scatter(loo[:,65],loo[:,275])

In [None]:
np.mean(df[cols].iloc[55])

In [None]:
df.iloc[55]

In [None]:
cov = pd.DataFrame(cov)

In [None]:
cov.iloc[55]

In [None]:
np.where(cov[cols] <= 0)

In [None]:
len(np.unique(np.where(cov<0)[1]))

In [None]:
cov[cov[cols] <= 0].dropna(axis = 0)

In [None]:
cov[(cov[cov.columns] < 0).all(axis=1)]

In [None]:
cov.iloc[55].max()

In [None]:
(cov.iloc[np.where(cov<=0)[1]].index).unique()

In [None]:
dft = df.iloc[bad_glacs]

In [None]:
df.iloc[np.unique(bad_glacs)]

In [None]:
dft

In [None]:
cov[:,0]

In [None]:
df.iloc[np.unique(np.where(cov[,:]<0)[0])]

In [None]:
cov

In [None]:
np.where(cov==cov.max())

In [None]:
plt.scatter(
    eigenvectors, eigenvectors
)
plt.xscale('log')
plt.yscale('log')

In [None]:
np.where(eigenvalues == np.max(eigenvalues))

In [None]:
plt.plot(eigenvalues,linestyle = None)
plt.yscale('symlog')

In [None]:
# x = np.linspace(eigenvectors.min(),eigenvectors.max(),len(eigenvectors))
# for i in range(340):
plt.plot(
    eigenvectors[0],linestyle = None
)
plt.show()


In [None]:
df['Residual'] = df['est'] - df['Thickness']
df['FResidual'] = df['FMT'] - df['Thickness']

In [None]:
mean_1 = df['Residual'].mean()
std_1 = df['Residual'].std()
se_1 = df['Residual'].std() / np.sqrt(341)

mean_2 = df['FResidual'].mean()
std_2 = df['FResidual'].std()
se_2 = df['FResidual'].std() / np.sqrt(341)

In [None]:
Z_1 = (mean_1 - 0) / se_1

Z_2 = (mean_2 - 0) / se_2

print(Z_1)
print(Z_2)

In [None]:
x1 = df['Residual']
x2 = df['FResidual']
kde1 = stats.gaussian_kde(np.array(x1))
kde2 = stats.gaussian_kde(np.array(x2))
#visualize KDE
x1_eval = np.linspace(x1.min(),x1.max(), num=200)
plt.plot(x1_eval, kde1(x1_eval), '-',color = 'blue',label = 'This study Residual')

x2_eval = np.linspace(x2.min(),x2.max(), num=200)
plt.plot(x2_eval, kde2(x2_eval),color = 'orange',label = 'Farinotti Residual')

plt.plot(
    (x1.mean(),x1.mean()),
    (0,0.02),'--',color = 'blue',label = 'This Study Mean Residual'
)

plt.plot(
    (x2.mean(),x2.mean()),
    (0,0.02),'--',color = 'orange',label = 'Farinotti Mean Residual'
)


plt.legend()
print(f'This study mean residual = {x1.mean()}')
print(f'Farinotti mean residual = {x2.mean()}')

In [None]:
print(np.var(x1))
print(np.var(x2))

In [None]:
#get probability
p1 = kde1.integrate_box_1d(-np.inf, 0)
p2 = kde2.integrate_box_1d(-np.inf, 0)
print(f'probabiliity of achieving residual of 0 = {p1}')
print(f'probabiliity of achieving Fresidual of 0 = {p2}')


In [None]:
import scipy.stats as st
print(st.norm.pdf(Z_1))
print(st.norm.pdf(Z_2))

In [None]:
x1 = np.sort(np.random.standard_normal(size=500))
kde1 = stats.gaussian_kde(np.array(x1))
kde2 = stats.gaussian_kde(np.array(x2_eval))
#visualize KDE
plt.plot(x1, kde1(x1), '-',color = 'blue',label = 'This study Residual')

# x2_eval = np.linspace(x2.min(),x2.max(), num=200)
# plt.plot(x2_eval, kde2(x2_eval),color = 'orange',label = 'Farinotti Residual')

# plt.plot(
#     (x1.mean(),x1.mean()),
#     (0,0.02),'--',color = 'blue',label = 'This Study Mean Residual'
# )

# plt.plot(
#     (x2.mean(),x2.mean()),
#     (0,0.02),'--',color = 'orange',label = 'Farinotti Mean Residual'
# )


plt.legend()
print(f'This study mean residual = {x1.mean()}')
print(f'Farinotti mean residual = {x2.mean()}')

In [None]:
for i in range(216501):
    glac = RGI.iloc[i]
    print(sum(glac[cols] / fr['unc']) / sum(1/fr['unc']))
    break
#     (RGI[cols] /  fr['unc'].T) / (1/fr['unc'].T)

In [None]:
for i in range(5):
    plt.hist(data[range(341)].iloc[random.randint(0,341)])
    plt.show()

In [None]:
df

In [None]:
print(f'LOO Global Volume Estimate {np.round(sum(lb)), np.round(sum(ub))} * 10^3 km^3')

In [None]:
dfci = df[['LCI','UCI']]

In [None]:
x = df['Thickness']
y = np.mean(dfci, axis = 1)
plt.errorbar(
    x,y,yerr = df['UCI'] - df['LCI'],
        alpha = 0.25,
#     label = 'Estimates $\hat{\mu}(x)$',
    linestyle = 'None',
    marker = 'o',
    capsize = 8,
    color = '#1f77b4',
)

plt.plot(
    (x.min(),x.max()),
    (y.min(),y.max()),
    '-k'
)

plt.ylabel('Estimated Thickness')
plt.xlabel('GlaThiDa Survey')
plt.title('Leave-One-Out X-val 95% CI')

In [None]:
df

In [None]:
df['we'].max()