In [1]:
import pandas as pd
import glacierml as gl
pd.set_option('display.max_columns', None)


In [None]:
df, ref = gl.notebook_data_loader()

test = pd.merge(df, ref, on = 'RGIId', how = 'outer')

# x is originally df, so we sum those values where y is blank

test['Area_x'] = test['Area_x'].astype(float)

thick_loss = sum(test['Mean Thickness'][test['Area_y'].isnull()]) / 1e3
area_loss = sum(test['Area_x'][test['Area_y'].isnull()]) / 1e3

print(f'Thickness lost from reference merge (km): {thick_loss}')

print(f'Area lost from reference merge (km^2): {area_loss}')

lost = test[test['Area_y'].isnull()]

lost_data = pd.DataFrame({
    'Fields':[
                'E&L Thickness', 'Farinotti Thickness',
                'E&L&F Combined Thickness','E&L Lost Thickness',
                'E&L Area','Farinotti Area',
                'E&L&F Combined Area','E&L Lost Area'],
    'Mean':[
                df['Mean Thickness'].mean(),ref['Farinotti Mean Thickness'].mean(), 
                ref['Edasi Mean Thickness'].mean(), lost['Mean Thickness'].mean(),
                df['Area'].mean(), ref['Area'].mean(), 
                ref['Area'].mean(), lost['Area_x'].mean()
        ],
    
    'Median':[
                df['Mean Thickness'].median(),ref['Farinotti Mean Thickness'].median(), 
                ref['Edasi Mean Thickness'].median(), lost['Mean Thickness'].median(),
                df['Area'].median(), ref['Area'].median(), 
                ref['Area'].median(), lost['Area_x'].median()
         ],
    
    'Max':[
                df['Mean Thickness'].max(),ref['Farinotti Mean Thickness'].max(), 
                ref['Edasi Mean Thickness'].max(),lost['Mean Thickness'].max(),
                df['Area'].max(), ref['Area'].max(), 
                ref['Area'].max(), lost['Area_x'].max()
        ],
    
    'Min':[
                df['Mean Thickness'].min(),ref['Farinotti Mean Thickness'].min(), 
                ref['Edasi Mean Thickness'].min(), lost['Mean Thickness'].min(),
                df['Area'].min(), ref['Area'].min(), 
                ref['Area'].min(), lost['Area_x'].min()
        ]
})
lost_data

In [None]:
import cartopy.crs as crs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 10))


ax = fig.add_subplot(1,1,1, projection=crs.PlateCarree())

ax.add_feature(cfeature.COASTLINE)
plt.scatter(
    x = lost['CenLon_x'], 
    y = lost['CenLat_x'],
#     c = dft['VE / VF'],
#     cmap = 'viridis',
    marker = '.', 
#     alpha = 1

)
ax.set_title('Farinotti excluded glaciers')
plt.show()

In [2]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import glacierml as gl
from scipy.stats import shapiro

# print('Please select co-registration method: df1, df2, df3, df4, df5, df6, df7, df8')

coregistration = 'df8'

print('Loading predictions...')
predictions = gl.predictions_finder(coregistration = coregistration)
predictions = predictions.reset_index()
predictions = predictions.drop('index', axis = 1)

df = pd.DataFrame(columns = {
        'RGIId','0', '1', '2', '3', '4', '5', '6', '7', '8', '9','10',
        '11','12','13','14','15','16','17','18','19','20','21',
        '22','23','24',
})

print('Predictions loaded')



Loading predictions...


100%|██████████| 6128/6128 [02:23<00:00, 42.85it/s]   


Predictions loaded


In [None]:
predictions

In [None]:

print('Compiling predictions...')
for index in tqdm(predictions.index):
    idx = index
#     print(idx)

    coregistration =  predictions['coregistration'].iloc[idx]
    architecture = '_' + predictions['architecture'].iloc[idx]
    learning_rate = predictions['learning rate'].iloc[idx]
    epochs = '2000'
    df_glob = gl.global_predictions_loader(
        coregistration = coregistration,
        architecture = architecture,
        learning_rate = learning_rate,
        epochs = epochs

    )
    

    df = pd.concat([df,df_glob])
# df = df[[
#         'RGIId','0', '1', '2', '3', '4', '5', '6', '7', '8', '9','10',
#         '11','12','13','14','15','16','17','18','19','20','21',
#         '22','23','24',
# ]]


In [None]:
df

In [None]:
predictions

In [None]:
predictions = predictions.rename(columns = {
    'architecture':'layer architecture'
})
predictions

In [None]:
df_weights = pd.merge(predictions, deviations, on = 'layer architecture')

In [None]:
df_weights = df_weights[[
    'layer architecture',
    'coregistration',
    'predicted volume',
    'std dev',
    'architecture weight'
]]
df_weights

In [None]:
new_mean = sum(
    df_weights['predicted volume'] * (1/(df_weights['std dev'])**2) * (df_weights['architecture weight'])
) / sum((1/(df_weights['std dev'])**2) * (df_weights['architecture weight']))
        
new_mean

In [None]:
# load deviations table 
rootdir = 'zults/'
predictions = pd.DataFrame()
deviations = pd.DataFrame()

for file in tqdm(os.listdir(rootdir)):
    if 'predictions' in file:
        file_reader = pd.read_csv(rootdir + file)
        predictions = predictions.append(file_reader, ignore_index = True)
    if 'deviations' in file:
        file_reader = pd.read_csv(rootdir + file)
        deviations = pd.concat([deviations, file_reader], ignore_index = True)

deviations = deviations.drop('Unnamed: 0', axis = 1)
predictions = predictions.drop('Unnamed: 0', axis = 1)
# deviations['total parameters'] = deviations['total parameters'].astype(int)
# deviations['trained parameters'] = deviations['trained parameters'].astype(int)
# deviations['total inputs'] = deviations['total inputs'].astype(int)
deviations = deviations[
    (deviations['df'].str.contains(coregistration)) 
#     &
#     (deviations['layer architecture'] == '10-5')
    &
    (deviations['learning rate'] == 0.01)
#     &
#     (deviations['epochs'] == 999)
    &
    (deviations['dropout'] == 1)
]
deviations['test - train'] = (
    abs(deviations['test mae avg'] - deviations['train mae avg'])
)
deviations = deviations.sort_values(
    [
        'layer architecture',
        'test - train',
        'epochs',
        'test mae avg', 
        'train mae avg',
        'test predicted thickness std dev',
        'layer architecture',
        'learning rate',
        'df',
        'layer architecture'
    ]
)
deviations['paramater ratio'] = deviations['trained parameters'] / deviations['total inputs']
deviations

In [None]:
deviations['architecture weight'].dtype()

In [None]:
test = pd.merge(df, deviations, on = 'layer architecture')
test

In [None]:
test[[
        'RGIId','0', '1', '2', '3', '4', '5', '6', '7', '8', '9','10',
        '11','12','13','14','15','16','17','18','19','20','21',
        '22','23','24','architecture weight'
]]


In [None]:
compiled_raw = test.groupby('RGIId')[
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9','10',
        '11','12','13','14','15','16','17','18','19','20','21',
        '22','23','24', 'architecture weight'
]

In [None]:
df[df['RGIId'] == 'RGI60-11.00001']

In [None]:
test_mean = df_glob[[
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9','10',
        '11','12','13','14','15','16','17','18','19','20','21',
        '22','23','24',
]].mean(axis = 1)

In [None]:
df_glob['test_mean'] = test_mean
df_glob

In [None]:
test_mean

In [None]:
for this_rgi_id, obj in tqdm(compiled_raw):
    print(this_rgi_id)
    print(obj)
    break

In [None]:
dft = pd.DataFrame()

for this_rgi_id, obj in tqdm(compiled_raw):
    print(this_rgi_id)
    print(obj)
    
    obj['weight'] = obj['architecture weight'] + obj[['0', '1', '2', '3', '4',
                                                     '5', '6', '7', '8', '9',
                                                     '10','11','12','13','14',
                                                     '15','16','17','18','19',
                                                     '20','21','22','23','24']].std(axis = 1)
    
    
    obj['weighted mean'] = obj['weight'] * obj[['0', '1', '2', '3', '4',
                                               '5', '6', '7', '8', '9',
                                               '10','11','12','13','14',
                                               '15','16','17','18','19',
                                               '20','21','22','23','24']].mean(axis = 1)
    
    
    weighted_glacier_mean = sum(obj['weighted mean']) / sum(obj['weight'])
    break
#     rgi_id = pd.Series(this_rgi_id, name = 'RGIId')
#     dft = pd.concat([dft, rgi_id])
#     dft = dft.reset_index()
#     dft = dft.drop('index', axis = 1)
    
#     weighted_glacier_mean_entry = obj

In [None]:
weighted_glacier_mean

In [None]:
df_glob

In [None]:
architecture[1:]

In [None]:

print('Predictions compiled')
print('Aggregating statistics...')
dft = pd.DataFrame()
for this_rgi_id, obj in tqdm(compiled_raw):
    rgi_id = pd.Series(this_rgi_id, name = 'RGIId')
#     print(f"Data associated with RGI_ID = {this_rgi_id}:")
    dft = pd.concat([dft, rgi_id])
    dft = dft.reset_index()
    dft = dft.drop('index', axis = 1)
    obj['weight'] = obj['architecture weight'] + obj[['0', '1', '2', '3', '4',
                                                     '5', '6', '7', '8', '9',
                                                     '10','11','12','13','14',
                                                     '15','16','17','18','19',
                                                     '20','21','22','23','24']].std(axis = 1)
    
    
    obj['weighted mean'] = obj['weight'] * obj[['0', '1', '2', '3', '4',
                                               '5', '6', '7', '8', '9',
                                               '10','11','12','13','14',
                                               '15','16','17','18','19',
                                               '20','21','22','23','24']].mean(axis = 1)
    
    
    weighted_glacier_mean = sum(obj['weighted mean']) / sum(obj['weight'])
    
    
    
    
    
    
    
    
    
    
    
    stacked_object = obj[[
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9','10',
        '11','12','13','14','15','16','17','18','19','20','21',
        '22','23','24',
    ]].stack()
    
    glacier_count = len(stacked_object)
    dft.loc[dft.index[-1], 'Weighted Mean Thickness'] = weighted_glacier_mean
    dft.loc[dft.index[-1], 'Mean Thickness'] = stacked_object.mean()
    dft.loc[dft.index[-1], 'Median Thickness'] = stacked_object.median()
    dft.loc[dft.index[-1],'Thickness Std Dev'] = stacked_object.std()
    
    statistic, p_value = shapiro(stacked_object)    
    dft.loc[dft.index[-1],'Shapiro-Wilk statistic'] = statistic
    dft.loc[dft.index[-1],'Shapiro-Wilk p_value'] = p_value

    
    q75, q25 = np.percentile(stacked_object, [75, 25])    
    dft.loc[dft.index[-1],'IQR'] = q75 - q25 
    
    lower_bound = np.percentile(stacked_object, 50 - 34.1)
    median = np.percentile(stacked_object, 50)
    upper_bound = np.percentile(stacked_object, 50 + 34.1)
    
    dft.loc[dft.index[-1],'Lower Bound'] = lower_bound
    dft.loc[dft.index[-1],'Upper Bound'] = upper_bound
    dft.loc[dft.index[-1],'Median Value'] = median
    dft.loc[dft.index[-1],'Total estimates'] = glacier_count
    
    break
    
dft = dft.rename(columns = {
    0:'RGIId'
})
dft = dft.drop_duplicates()
# dft.to_csv(
#     'predicted_thicknesses/sermeq_aggregated_bootstrap_predictions_coregistration_' + 
#     coregistration + '.csv'
#           )

In [None]:
dft