# Uncertanty analysis 

In [None]:
import numpy as np
import pandas as pd
import xarray as xr 
import geopandas as gpd

import os
from glob import glob
from oggm import utils
from tqdm import tqdm
from tqdm.notebook import tqdm

os.chdir('/home/rooda/OGGM_results/')

import warnings
warnings.simplefilter('ignore', np.RankWarning)

## Data

In [None]:
RGI6_ids = gpd.read_file("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/RGI6_v2.shp")
RGI7_ids = gpd.read_file("/home/rooda/Dropbox/Patagonia/GIS South/Glaciers/RGI7_v2.shp")
RGI6_ids = RGI6_ids[RGI6_ids.area_km2 > 7][["RGIId", "Zone", "ID_basin"]]
RGI7_ids = RGI7_ids[RGI7_ids.area_km2 > 7]

RGI7_ids = utils.cook_rgidf(RGI7_ids, o1_region='17', o2_region='02', bgndate= RGI7_ids.src_date, 
                            version = "70", assign_column_values= {'Zone' : 'Zone', 'ID_basin' : 'ID_basin'})

RGI7_ids = RGI7_ids[["RGIId", "Zone", "ID_basin"]]
ids = pd.concat([RGI6_ids, RGI7_ids]).set_index("RGIId")

dict_zone = {1:'PPY', 2:'PCA', 3:'NPI-E', 4:'NPI-W', 5:'SPI-N', 6:'SPI-C', 7:'SPI-S', 8:'GCN', 9:'CDI'}
ids = ids.replace({"Zone": dict_zone})

## Preprocessing

In [None]:
# remove unnecessary variables and coordinates
variables = ["melt_on_glacier", "melt_on_glacier_monthly"]

def preprocess(ds): 
    ds = ds.drop_vars(['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month'])
    ds = ds[variables]
    return ds

# Calculation of all metrics

In [None]:
# Folder with all the results
gdirs = glob("/home/rooda/OGGM_results/new/*", recursive = True)

dataset_peak_year      = []
dataset_peak_magnitude = []
dataset_frecuency      = []
dataset_rate_change    = []
dataset_duration       = []

for gdir in tqdm(gdirs):

    # read historical run 
    model_hist   = xr.open_mfdataset(gdir + "/run_outputs_*.nc", preprocess = preprocess)[variables]
    model_hist   = model_hist.isel(time=slice(0, -1)) 
    
    paths = glob(gdir + "/run_output_*.nc", recursive = True)
    for path in tqdm(paths, leave = False):
        
        # read future run and concatenate
        model_future = xr.open_dataset(path)
        model_future = preprocess(model_future)
        model   = xr.concat([model_hist, model_future], dim = "time").load()
        
        # add basin ID to each glacier ID (RGI_ID)
        ids_subset = ids[ids.index.isin(model.rgi_id.to_pandas().tolist())]
        model = model.assign_coords(rgi_id = ids_subset.ID_basin.tolist())
        
        # aggregate based on "new" ID
        model = model.groupby('rgi_id').sum()
        model = model.isel(time=slice(0, -1)) * 1e-9
        rolling = model.melt_on_glacier.rolling(time=11, center=True).mean()
        
        # ID of the setup
        experiment_id = pd.Series(data = {'Outline': os.path.basename(gdir).split("_")[0],
                                          'Climate': os.path.basename(gdir).split("_")[1],
                                          'Volume':  os.path.basename(gdir).split("_")[2],
                                          'GCM':     os.path.basename(path).split("_")[2],
                                          'SSP':     os.path.basename(path).split("_")[3],
                                          'BCM':     os.path.basename(path).split("_")[4][0:3]})
        
        ## Peak water year: Calculated using a 11-year moving average
        peak_year = rolling.idxmax(dim = "time").astype("int16").to_series()      
        peak_year = pd.DataFrame(pd.concat([experiment_id, peak_year]), columns=['peak_water_year']).transpose()
        dataset_peak_year.append(peak_year)
        
        ## Magnitude: Annual maximum of total melt on the glacier
        peak_magnitude = rolling.max(dim = "time") 
        peak_magnitude = peak_magnitude.to_series()      
        peak_magnitude = pd.DataFrame(pd.concat([experiment_id, peak_magnitude]), columns=['peak_magnitude']).transpose()
        dataset_peak_magnitude.append(peak_magnitude)
        
        ## Frecuency
        frecuency = model.melt_on_glacier - rolling
        frecuency = frecuency.std(dim = "time").to_series()
        frecuency = pd.DataFrame(pd.concat([experiment_id, frecuency]), columns=['frecuency']).transpose()
        dataset_frecuency.append(frecuency)
        
        ## Rate of change
        rolling_norm = rolling/rolling.max(dim = "time")  # normalize values (max = 1)
        rolling_norm = rolling_norm.transpose("rgi_id", "time") # trick to complete loop
        
        rate_change = []
        for catchment in model.rgi_id.to_numpy(): # get the trend for each catchment
            
            time_period = slice(int(peak_year[catchment]), int(peak_year[catchment])+30)
            rate_change_i = rolling_norm.sel(time = time_period, rgi_id = catchment)
            rate_change_i = rate_change_i.polyfit(dim = "time", deg = 1, skipna = True).polyfit_coefficients[0].to_numpy()
            rate_change_i = rate_change_i * 100 * 10 # final value: %% per decade
            rate_change.append(rate_change_i)
        
        rate_change = pd.Series(rate_change, index=model.rgi_id.to_numpy())
        rate_change = pd.DataFrame(pd.concat([experiment_id, rate_change]), columns=['rate_of_change']).transpose()
        dataset_rate_change.append(rate_change)
        
        ## Duration
        duration = model.melt_on_glacier_monthly.mean(dim = "time")*100 / model.melt_on_glacier_monthly.mean(dim = "time").sum(dim = "month_2d")
        duration = duration.sel(month_2d = [12,1,2]).sum(dim = "month_2d").to_series()
        duration = pd.DataFrame(pd.concat([experiment_id, duration]), columns=['duration']).transpose()
        dataset_duration.append(duration)
        
# concatenate all metrics 
dataset = pd.concat([pd.concat(dataset_peak_year),
                     pd.concat(dataset_peak_magnitude),
                     pd.concat(dataset_frecuency),
                     pd.concat(dataset_rate_change),
                     pd.concat(dataset_duration)])
                         
dataset.to_csv("/home/rooda/Dropbox/Patagonia/MS2 Results/dataset_hydro_signatures.csv")