# Code to speed up some processes from R
Developed by Rodrigo Aguayo (2020-2023)

In [None]:
import pandas as pd
import xarray as xr
import regionmask
import rioxarray as rioxr
import geopandas as gpd
import numpy as np
import os

os.chdir('/home/rooda/Dropbox/Patagonia/') 
days = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31])

encode_pp   = {"pp": {"zlib": True, "complevel": 1, "dtype": "int16"}}
encode_t2m  = {'t2m':   {'dtype': 'int16', 'scale_factor': 0.01, '_FillValue': -9999}}
encode_tmax = {'tmax':  {'dtype': 'int16', 'scale_factor': 0.01, '_FillValue': -9999}}
encode_tmin = {'tmin':  {'dtype': 'int16', 'scale_factor': 0.01, '_FillValue': -9999}}

## Temperature 

### Reference

In [None]:
tmax = xr.open_mfdataset("Data/Temperature/Tmax_PMET_1980_2020d_*.nc", concat_dim='time', combine='nested', chunks ="auto")
tmax = tmax.sortby("time").Tmax.rename("tmax")
tmax.to_netcdf("Data/Temperature/Tmax_PMETsim_1980_2020_v10d.nc", encoding = encode_tmax)

tmin = xr.open_mfdataset("Data/Temperature/Tmin_PMET_1980_2020d_*.nc", concat_dim='time', combine='nested', chunks ="auto")
tmin = tmin.sortby("time").Tmin.rename("tmin")
tmin.to_netcdf("Data/Temperature/Tmin_PMETsim_1980_2020_v10d.nc", encoding = encode_tmin)

t2m_pmet = (tmax + tmin)/2
t2m_pmet = t2m_pmet.rename("t2m")
t2m_pmet.to_netcdf("Data/Temperature/Tavg_PMETsim_1980_2020_v10d.nc", encoding = encode_t2m)

t2m_pmet_m = t2m_pmet.resample(time='MS').mean()
t2m_pmet_m = xr.Dataset(dict(t2m = t2m_pmet_m))
t2m_pmet_m.to_netcdf("Data/Temperature/Tavg_PMETsim_1980_2020_v10m.nc", encoding = encode_t2m)

t2m_pmet_y = t2m_pmet.resample(time='YS').mean()
t2m_pmet_y = t2m_pmet_y.mean(dim = "time")
t2m_pmet_y.rio.to_raster("Data/Temperature/Tavg_PMETsim_1980_2020_v10.tif")

### Cross-validation

In [None]:
for cv in range (1,11):
    tmax = xr.open_mfdataset("Data/Temperature/CV/raw/Tmax_PMET_*" + str(cv) +".nc", concat_dim='time', combine='nested', chunks ="auto")
    tmax = tmax.sortby("time").Tmax.rename("tmax")
    tmax.to_netcdf("Data/Temperature/CV/Tmax_PMETsim_1980_2020d_"  + str(cv) + ".nc", encoding = encode_tmax)
    
    tmin = xr.open_mfdataset("Data/Temperature/CV/raw/Tmin_PMET_*" + str(cv) +".nc", concat_dim='time', combine='nested', chunks ="auto")
    tmin = tmin.sortby("time").Tmin.rename("tmin")
    tmin.to_netcdf("Data/Temperature/CV/Tmin_PMETsim_1980_2020d_"  + str(cv) + ".nc", encoding = encode_tmin)
    
    tavg = (tmax + tmin)/2
    tavg = tavg.rename("t2m")
    tavg.to_netcdf("Data/Temperature/CV/Tavg_PMETsim_1980_2020d_" + str(cv) + ".nc", encoding = encode_t2m)

    tavg_m = tavg.resample(time='MS').mean()
    tavg_m = xr.Dataset(dict(t2m = tavg_m))
    tavg_m.to_netcdf("Data/Temperature/CV/Tavg_PMETsim_1980_2020m_" + str(cv) + ".nc", encoding = encode_t2m)

## Precipitation 

### Reference

In [None]:
pp_pmet_npc = xr.open_mfdataset("Data/Precipitation/PP_PMET_1980_2020d_*npc.nc", concat_dim='time', combine='nested', chunks ="auto")
pp_pmet_npc = pp_pmet_npc.sortby("time")
pp_pmet_npc.to_netcdf("Data/Precipitation/PP_PMETsim_1980_2020d_npc.nc")

In [None]:
pp_pmet_npc = xr.open_dataset("Data/Precipitation/PP_PMETsim_1980_2020d_npc.nc", chunks = "auto").pp
bias_factor = xr.open_dataset("MS1 Results/Bias_Factor_PP.tif").band_data
bias_factor = bias_factor[0].drop_vars(["band", "spatial_ref"])
bias_factor = bias_factor.rename({"x": "longitude", "y": "latitude"})
bias_factor = bias_factor.interp(longitude = pp_pmet_npc.longitude, latitude = pp_pmet_npc.latitude)

pp_pmet = (pp_pmet_npc * bias_factor)
pp_pmet = xr.Dataset(dict(pp = pp_pmet))
pp_pmet.to_netcdf("Data/Precipitation/PP_PMETsim_1980_2020_v10d.nc")

pp_pmet_m = pp_pmet.resample(time='MS').sum(skipna = False)
pp_pmet_m.to_netcdf("Data/Precipitation/PP_PMETsim_1980_2020_v10m.nc")

pp_pmet_y = pp_pmet_m.resample(time='YS').sum(skipna = False)
pp_pmet_y = pp_pmet_y.mean(dim = "time")
pp_pmet_y.rio.to_raster("Data/Precipitation/PP_PMETsim_1980_2020_v10.tif")

### Cross-validation

In [None]:
for cv in range (1,11):
    pp = xr.open_mfdataset("/home/rooda/PMET_results/Precipitation/raw/PP_PMET_*" + str(cv) + "_npc.nc", concat_dim='time', combine='nested', chunks ="auto")
    pp = pp.sortby("time").pp.rename("pp")
    pp = pp.astype("int16")
    pp.to_netcdf("/home/rooda/PMET_results/Precipitation/PP_PMETsim_1980_2020d_npc_"  + str(cv) + ".nc")
    print(cv)

In [None]:
for cv in range (1,11):
    pp_pmet_npc = xr.open_dataset("/home/rooda/PMET_results/Precipitation/PP_PMETsim_1980_2020d_npc_"  + str(cv) + ".nc").pp
    bias_factor = xr.open_dataset("/home/rooda/PMET_results/Precipitation/Bias_Factor_PP_" + str(cv) + ".tif").band_data
    bias_factor = bias_factor[0].drop_vars(["band", "spatial_ref"])
    bias_factor = bias_factor.rename({"x": "longitude", "y": "latitude"})
    bias_factor = bias_factor.interp(longitude = pp_pmet_npc.longitude, latitude = pp_pmet_npc.latitude)
    
    pp_pmet = (pp_pmet_npc * bias_factor)
    pp_pmet = xr.Dataset(dict(pp = pp_pmet))
    pp_pmet.to_netcdf("/home/rooda/PMET_results/Precipitation/PP_PMETsim_1980_2020d_"  + str(cv) + ".nc")
    
    pp_pmet_m = pp_pmet.resample(time='MS').sum(skipna = False)
    pp_pmet_m.to_netcdf("/home/rooda/PMET_results/Precipitation/PP_PMETsim_1980_2020m_"  + str(cv) + ".nc")
    print(cv)