### calculate the leadtime-dependent climatological terciles, deciles and percentiles (0.02, then 0.05 to 0.95 with 0.05 step) from the individual GCMs' hindcast dataset (period 1993 - 2016) for admin areas  

### This notebook is driven via papermill by `ICU_forecast_table/drive_admin_GCMs_evaluation.ipynb`

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline

### os and standard libraries 
import os
import sys
from collections import OrderedDict
from itertools import product

### datetimes
from datetime import datetime, timedelta

### scipy
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import cartopy.crs as ccrs
import dask
from dask.diagnostics import ProgressBar
from tqdm import tqdm

### plotting
from matplotlib import pyplot as plt
import matplotlib
import seaborn as sns


In [3]:
import pathlib

HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 

In [4]:
sys.path.append('../../') 

In [5]:
from ICU_Water_Watch import geo, C3S, domains, plot, utils

### read one GCM 

In [6]:
provider = 'CDS'
GCM = 'ECMWF'
varname = 'tprate'

### path definitions follow

### outputs 

In [7]:
outputs_path = HOME.joinpath("operational/ICU/development/hotspots/code/ICU_Water_Watch/outputs/admin/lsmasks")

In [8]:
print(str(outputs_path))

/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/outputs/admin/lsmasks


In [9]:
fig_path = HOME.joinpath('operational/ICU/development/hotspots/code/ICU_Water_Watch/figures/admin_lsmasks')

In [10]:
print(fig_path)

/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/figures/admin_lsmasks


#### where to find the GCM hindcast datasets 

In [11]:
gcm_path = pathlib.Path(f'/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts')

In [12]:
dpath = gcm_path.joinpath(GCM).joinpath(varname.upper())

In [13]:
print(dpath)

/media/nicolasf/END19101/ICU/data/CDS/operational/hindcasts/ECMWF/TPRATE


### get the list of files 

In [14]:
lfiles = list(dpath.glob(f"ensemble_seas_forecasts_{varname}_from_*.netcdf"))

In [15]:
lfiles.sort()

In [16]:
dset_gcm = xr.open_dataset(lfiles[0])

In [17]:
dset_gcm = C3S.preprocess_GCM(dset_gcm)

In [18]:
dset_gcm.info

<bound method Dataset.info of <xarray.Dataset>
Dimensions:  (lon: 121, lat: 81, member: 25, step: 5, time: 1)
Coordinates:
  * time     (time) datetime64[ns] 1993-01-01
  * lon      (lon) float32 120.0 121.0 122.0 123.0 ... 237.0 238.0 239.0 240.0
  * lat      (lat) float32 -50.0 -49.0 -48.0 -47.0 -46.0 ... 27.0 28.0 29.0 30.0
  * member   (member) int32 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 24
  * step     (step) int64 1 2 3 4 5
Data variables:
    tprate   (time, step, member, lat, lon) float32 5.329e-08 ... 1.209e-09
Attributes:
    Conventions:  CF-1.6
    history:      2022-01-17 01:17:57 GMT by grib_to_netcdf-2.23.0: /opt/ecmw...>

### reads the coastlines 

In [19]:
coastlines_dpath = pathlib.Path('/home/nicolasf/operational/ICU/development/hotspots/data/shapefiles/Admin_boundaries/Coastlines')

In [20]:
shapefile = gpd.read_file(coastlines_dpath.joinpath('ICU_admin_geometries0_360.shp'))

In [21]:
shapefile

Unnamed: 0,id,Country,Admin_boun,geometry
0,1,Northern Mariana Islands,Southern Islands,"MULTIPOLYGON (((145.13454 14.13935, 145.13472 ..."
1,2,Northern Mariana Islands,Northern Islands,"MULTIPOLYGON (((146.05832 16.01259, 146.05590 ..."
2,3,Guam,Guam,"MULTIPOLYGON (((144.65813 13.23972, 144.65000 ..."
3,4,Palau,Babeldaob region,"MULTIPOLYGON (((134.12425 6.92185, 134.12468 6..."
4,5,Palau,South-west Islands,"MULTIPOLYGON (((131.81201 2.97124, 131.81115 2..."
...,...,...,...,...
68,24,Cook Islands,Northern Cook Islands,"MULTIPOLYGON (((196.88468 -13.33285, 196.88385..."
69,70,FSM,Kapingamarangi,"MULTIPOLYGON (((154.77861 1.02694, 154.78083 1..."
70,71,Marshall Islands,Southern Marshall Islands,"MULTIPOLYGON (((168.76519 4.60420, 168.76501 4..."
71,72,Marshall Islands,Central Marshall Islands,"MULTIPOLYGON (((168.76889 7.28056, 168.76944 7..."


In [22]:
country_col = 'Country'
admin_col = 'Admin_boun'

In [23]:
shapefile.loc[:,f"{admin_col}"] = shapefile.loc[:,f"{admin_col}"].str.replace("'","")

In [24]:
domain_buffer = 1

In [25]:
l_countries_admin = []

In [26]:
for country in shapefile['Country'].unique(): 
    
    sub = shapefile.query(f"Country == '{country}'")
    
    for admin in sub['Admin_boun'].unique(): 
        
        print(f"processing {country}, admin region {admin}")
        
        shape = sub.query(f"{admin_col} == '{admin}'")
        
        original_shape = shape.copy()
        
        shape = shape.buffer(0.25)

        bounds = shape.bounds.values.flatten() 

        domain = [bounds[0] - domain_buffer, bounds[2] + domain_buffer, bounds[1] - domain_buffer, bounds[3] + domain_buffer]
        
        lon_min, lon_max, lat_min, lat_max = domain
        
        dset, domain = geo.mask_dataset(dset_gcm, shape, coastline_buffer=None, varname='tprate')
        
        mask = dset[['mask']]
        
        d = dict(
        lon_min = lon_min, 
        lon_max = lon_max, 
        lat_min = lat_min, 
        lat_max = lat_max, 
        )
        
        mask['mask'].attrs.update(d)
        
        mask.to_netcdf(outputs_path.joinpath(f"landsea_mask_{utils.sanitize_name(country)}_{utils.sanitize_name(admin)}.nc"))
        
        # figure 
        
        f, ax = plt.subplots(figsize=(10,8), subplot_kw={'projection':ccrs.PlateCarree(central_longitude=180)})

        shape.boundary.plot(ax=ax, color='r', lw=1, transform=ccrs.PlateCarree())

        original_shape.boundary.plot(ax=ax, color='k', lw=0.7, alpha=1, transform=ccrs.PlateCarree())

        mask['mask'].plot(ax=ax, add_colorbar=False, transform=ccrs.PlateCarree(), alpha=0.4, cmap=plt.cm.gray)

        ax.set_title(f"Country: {country}, admin. area: {admin}\n{mask['mask'].attrs['cells']} grid points", fontsize=14)

        lons_grid = np.linspace(np.floor(lon_min), np.ceil(lon_max), num=5, endpoint=True) 
        lats_grid = np.linspace(np.floor(lat_min), np.ceil(lat_max), num=5, endpoint=True)
        
        plot.make_gridlines(ax, lons=lons_grid, lats=lats_grid)

        f.savefig(fig_path.joinpath(f"landsea_mask_{utils.sanitize_name(country)}_{utils.sanitize_name(admin)}.png"), dpi=200, bbox_inches='tight', facecolor='w')
                  
        plt.close(f)
    
        mask.close()
        
        l_countries_admin.append([utils.sanitize_name(country), utils.sanitize_name(admin)])

processing Northern Mariana Islands, admin region Southern Islands
processing Northern Mariana Islands, admin region Northern Islands
processing Guam, admin region Guam
processing Palau, admin region Babeldaob region
processing Palau, admin region South-west Islands
processing FSM, admin region Yap
processing FSM, admin region Chuuk
processing FSM, admin region Pohnpei
processing FSM, admin region Kosrae
processing FSM, admin region Kapingamarangi
processing Nauru, admin region Nauru
processing Kiribati, admin region Ocean Island
processing Kiribati, admin region Gilberts-North
processing Kiribati, admin region Gilberts-South
processing Kiribati, admin region Phoenix Islands
processing Kiribati, admin region Northern Line Islands
processing Kiribati, admin region Southern Line Islands
processing Kiribati, admin region Central Line Islands
processing Tokelau, admin region Tokelau
processing Tonga, admin region Tongatapu-Eua
processing Tonga, admin region Haapai
processing Tonga, admin r

In [27]:
df_countries_admin = pd.DataFrame(l_countries_admin)

In [28]:
df_countries_admin.columns = ['country','admin']

In [29]:
outputs_path.parent

PosixPath('/home/nicolasf/operational/ICU/development/hotspots/code/ICU_Water_Watch/outputs/admin')

In [30]:
df_countries_admin.to_csv(outputs_path.parent.joinpath("list_countries_admin.csv"))

In [31]:
df_countries_admin.head()

Unnamed: 0,country,admin
0,Northern_Mariana_Islands,Southern_Islands
1,Northern_Mariana_Islands,Northern_Islands
2,Guam,Guam
3,Palau,Babeldaob_region
4,Palau,South-west_Islands
