# Post-processing files for Zenodo upload (csv to clean netcdf)

In [None]:
import os 
import pandas as pd
import xarray as xr

from tqdm import tqdm

os.chdir('/home/rooda/OneDrive/Projects/DeepHydro/')
encode_Q = {'Q': {'dtype': 'int32', 'scale_factor': 0.0001, '_FillValue': -9999, 'zlib': True, 'complevel': 1}}

In [None]:
# data to convert mm/day to m3/s (or viceversa)
shape = pd.read_csv("data/Attributes_all_basins.csv", index_col = 0)

In [None]:
def preprocessing(dataframe, initial_year):
    dataframe = dataframe[shape.index]
    dataframe = dataframe[dataframe.index.year >= initial_year]
    dataframe = xr.Dataset({"Q": (["date", "basin_id"], dataframe.values)},  coords={"date": dataframe.index, "basin_id": dataframe.columns})
    return dataframe

## Historical 

In [None]:
LSTM_OGGM_on = pd.read_csv("results/runoff/total_runoff_historical_LSTM_OGGM_on_all.csv", index_col = 0, parse_dates= ["date"])
LSTM_OGGM_on = preprocessing(LSTM_OGGM_on, 2000)

LSTM_OGGM_off = pd.read_csv("results/runoff/total_runoff_historical_LSTM_OGGM_off_all.csv", index_col = 0, parse_dates= ["date"])
LSTM_OGGM_off = preprocessing(LSTM_OGGM_off, 2000)

TUWmodel_base = pd.read_csv("results/runoff/total_runoff_historical_TUWmodel_all.csv", index_col = 0, parse_dates= ["date"])
TUWmodel_base = preprocessing(TUWmodel_base, 2000)

GR4J_base = pd.read_csv("results/runoff/total_runoff_historical_GR4J_all.csv", index_col = 0, parse_dates= ["date"])
GR4J_base = preprocessing(GR4J_base, 2000)

historical_df = xr.concat([LSTM_OGGM_on, LSTM_OGGM_off, TUWmodel_base, GR4J_base], dim = "model")
historical_df = historical_df.assign_coords(model=["LSTM_OGGM_on", "LSTM_OGGM_off", "TUWmodel", "GR4J"])
historical_df["Q"].attrs["units"] = "mm d-1"
historical_df["Q"] = historical_df["Q"].astype("float32")

historical_df.to_netcdf("results/zenodo/Q_historical.nc", encoding = encode_Q)

## Future

In [None]:
gcms  = ["GFDL-ESM4", "IPSL-CM6A-LR", "MIROC6", "MPI-ESM1-2-LR", "MRI-ESM2-0"]
ssps  = ["ssp126", "ssp585"]

df_ssp = []

for ssp in tqdm(ssps):

    df_gcm = []
    for gcm in tqdm(gcms, leave = False):
        
        LSTM_OGGM_on_future_i = pd.read_csv("results/runoff/total_runoff_future_{}_{}_LSTM_OGGM_on_all.csv".format(gcm, ssp), parse_dates = ["date"], index_col = 0)
        LSTM_OGGM_on_future_i = preprocessing(LSTM_OGGM_on_future_i, 2020)

        LSTM_OGGM_off_future_i = pd.read_csv("results/runoff/total_runoff_future_{}_{}_LSTM_OGGM_off_all.csv".format(gcm, ssp), parse_dates = ["date"], index_col = 0)
        LSTM_OGGM_off_future_i = preprocessing(LSTM_OGGM_off_future_i, 2020)

        TUWmodel_future_i = pd.read_parquet("results/runoff/total_runoff_future_{}_{}_TUWmodel_all.parquet".format(gcm, ssp))
        TUWmodel_future_i.index = TUWmodel_future_i["date"].dt.tz_localize(None).astype("datetime64[ns]")
        TUWmodel_future_i = preprocessing(TUWmodel_future_i, 2020)    

        GR4J_future_i = pd.read_parquet("results/runoff/total_runoff_future_{}_{}_GR4J_all.parquet".format(gcm, ssp))
        GR4J_future_i.index = GR4J_future_i["date"].dt.tz_localize(None).astype("datetime64[ns]")
        GR4J_future_i = preprocessing(GR4J_future_i, 2020)

        df_i = xr.concat([LSTM_OGGM_on_future_i, LSTM_OGGM_off_future_i, TUWmodel_future_i, GR4J_future_i], dim = "model")
        df_i = df_i.assign_coords(model=["LSTM_OGGM_on", "LSTM_OGGM_off", "TUWmodel", "GR4J"])
        df_gcm.append(df_i)

    df_gcm = xr.concat(df_gcm, dim='gcm')
    df_ssp.append(df_gcm)

df_ssp = xr.concat(df_ssp, dim='ssp')
df_ssp = df_ssp.assign_coords(gcm=gcms, ssp=ssps)
df_ssp["Q"].attrs["units"] = "mm d-1"
df_ssp["Q"] = df_ssp["Q"].astype("float32")

df_ssp.to_netcdf("results/zenodo/Q_future.nc", encoding = encode_Q)