In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import xarray as xr
import numpy as np
import os
from glob import glob
from mpl_toolkits.basemap import Basemap
from numpy import meshgrid
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
import matplotlib.colors as mcolors
import cartopy.feature as cfeature
import itertools
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter, LatitudeLocator
import matplotlib.ticker as mticker
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, TwoSlopeNorm
import pandas as pd
import math
from datetime import datetime
import datetime as dt
from ridgeplot import ridgeplot
import joypy
import seaborn as sns
from matplotlib import cm
import climpred
from xclim import sdba
from climpred.options import OPTIONS
import json
from sklearn.metrics import roc_curve, auc, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

from function import funs as f
from function import preprocessUtils as putils
from function import masks
from function import verifications
from function import conf
from function import dataLoad


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
region_name = 'china' #['australia','china','CONUS']
obs_source = 'GLEAM' #['GLEAM','ERA5']

if obs_source == 'ERA5':
    soil_dir = conf.era_data
elif obs_source == 'GLEAM':
    soil_dir = conf.gleam_data

In [None]:
#Set script parameters 
dir = conf.home

global dim_order,region_name
dim_order = conf.dim_order

test_start = '2018-01-01'
test_end = '2019-12-31'
val_start = '2016-01-01'
val_end = '2017-12-31'
train_start  = '2000-01-01'
train_end = '2015-12-31'

leads_ = [6,13,20,27,34]



mask = masks.load_mask(region_name)

if region_name == 'CONUS':
    region_mask = masks.load_mask(region_name).rename({'Y':'lat','X':'lon'})
    CONUS_region_names = {1:'Northeast',2:'Southeast',3:'Midwest',4:'Great Plains',5:'Northwest',6:'Southwest'}
    #Mask with np.nan for non-CONUS land values
    mask_anom = mask[putils.xarray_varname(mask)][:,:].values
else:
    mask_anom = mask[putils.xarray_varname(mask)][0,:,:].values

#Load observation anomaly as raw file
init_dates, dt_dates, only_testing_dates = dataLoad.return_init_and_testing_dates(region_name,test_start,test_end)

In [None]:
'''Load data'''
#Soil moisture 
global obs_original,obs_raw
obs_anom, obs_raw = dataLoad.load_rzsm_observations(soil_dir,region_name)
obs_raw = putils.restrict_to_bounding_box(obs_raw,mask)

obs_anom["time"] = obs_anom["time"].dt.floor("D")
obs_raw["time"] = obs_raw["time"].dt.floor("D")


#reforecast soil data (raw)
raw_gefs_soil, gefs_file_list, anom_gefs_soil = dataLoad.load_GEFS_soil_reforecast(region_name,mask_anom)
raw_ecmwf_soil, anom_ecmwf_soil = dataLoad.load_ECMWF_soil_reforecast(region_name,mask_anom, init_dates)

In [None]:
#We have different coordinate systems. So we need to add 360 to each of the X coordinates if they are negative
if region_name == 'CONUS':
    print(f'We are changing the coordinates of CONUS to match similar format as GLEAM or ERA5')
    new_X_coords = [i+360 if i < 0 else i for i in raw_ecmwf_soil.X.values] #same as putils.create_new_X_coord_values
    #Add the new coordinates
    raw_ecmwf_soil = raw_ecmwf_soil.assign_coords({'X':new_X_coords})
    anom_ecmwf_soil = anom_ecmwf_soil.assign_coords({'X':new_X_coords})
    
    raw_ecmwf_soil = putils.restrict_to_bounding_box(raw_ecmwf_soil,mask)
    anom_ecmwf_soil = putils.restrict_to_bounding_box(anom_ecmwf_soil,mask)

    new_X_coords = [i+360 if i < 0 else i for i in raw_ecmwf_soil.X.values] #same as putils.create_new_X_coord_values
    #Add the new coordinates
    raw_gefs_soil = raw_gefs_soil.assign_coords({'X':new_X_coords})
    anom_gefs_soil = anom_gefs_soil.assign_coords({'X':new_X_coords})
    
    raw_gefs_soil = putils.restrict_to_bounding_box(raw_gefs_soil,mask)
    anom_gefs_soil = putils.restrict_to_bounding_box(anom_gefs_soil,mask)

    if obs_source == 'ERA5':
        obs_raw = obs_raw.rename({'X':'longitude','Y':'latitude'})
        obs_anom = obs_anom.rename({'X':'longitude','Y':'latitude'})

In [None]:
#Now bias correct and then take the anomaly
    
climpred.set_options(seasonality="dayofyear") 
seasonality_str = OPTIONS["seasonality"]

obs_raw_climp = verifications.rename_obs_for_climpred(obs_raw)
obs_anom_climp = verifications.rename_obs_for_climpred(obs_anom)

# Set up data for additive bias correction
# '''For GEFSv12 raw reforecast files (no anomaly)'''
# bias_correct_gefs = verifications.rename_subx_for_climpred(gefs_raw)
# additive_bias_gefs = verifications.additive_bias_removal(bias_correct_gefs,obs_full).get_initialized()


'''For GEFSv12 anomaly reforecast files (this is what we actually need)'''
bias_correct_gefs_anom = verifications.rename_subx_for_climpred(anom_gefs_soil)
additive_bias_gefs_anom = verifications.additive_bias_removal(bias_correct_gefs_anom,obs_anom_climp,train_start, train_end).get_initialized()

'''For ECMWF anomaly reforecast files (this is what we actually need)'''
bias_correct_ecmwf_anom = verifications.rename_subx_for_climpred(anom_ecmwf_soil.load())
additive_bias_ecmwf_anom = verifications.additive_bias_removal(bias_correct_ecmwf_anom,obs_anom_climp,train_start, train_end).get_initialized()

In [None]:

additive_bias_ecmwf_anom = additive_bias_ecmwf_anom.drop('valid_time').load()
additive_bias_gefs_anom = additive_bias_gefs_anom.drop('valid_time').load()


# Compute the ACC only testing data

In [None]:
ecm_acc = verifications.create_climpred_ACC(additive_bias_ecmwf_anom.sel(init=slice(test_start,None)), obs_anom_climp)
gef_acc = verifications.create_climpred_ACC(additive_bias_gefs_anom.sel(init=slice(test_start,None)), obs_anom_climp)


seasons = {
    "DJF": [12, 1, 2],  # Winter
    "MAM": [3, 4, 5],   # Spring
    "JJA": [6, 7, 8],   # Summer
    "SON": [9, 10, 11]  # Fall
}

In [None]:
#Save data
save_bias_corrected = f'Data/bias_corrected_reforecast/{region_name}'
os.makedirs(save_bias_corrected,exist_ok=True)

ecm_acc.to_netcdf(f'{save_bias_corrected}/ecmwf_acc_values_{obs_source}.nc')
gef_acc.to_netcdf(f'{save_bias_corrected}/gefs_acc_values_{obs_source}.nc')

additive_bias_ecmwf_anom.sel(init=slice(test_start,None)).to_netcdf(f'{save_bias_corrected}/ecmwf_anomaly_values_bias_corrected_testing_years_{obs_source}.nc')
additive_bias_gefs_anom.sel(init=slice(test_start,None)).to_netcdf(f'{save_bias_corrected}/gefs_anomaly_values_bias_corrected_testing_years_{obs_source}.nc')

In [None]:
'''Compute for each season individually'''
for season, months in seasons.items():
    
    # Filter datasets for the current season
    gefs_season = additive_bias_gefs_anom.sel(init=additive_bias_gefs_anom['init'].dt.month.isin(months)).sel(init=slice(test_start,None))
    ecmwf_season = additive_bias_ecmwf_anom.sel(init=additive_bias_ecmwf_anom['init'].dt.month.isin(months)).sel(init=slice(test_start,None))

    ecm_acc = verifications.create_climpred_ACC(ecmwf_season, obs_anom_climp)
    gef_acc = verifications.create_climpred_ACC(gefs_season, obs_anom_climp)

    ecm_acc.to_netcdf(f'{save_bias_corrected}/ecmwf_acc_values_{season}_{obs_source}.nc')
    gef_acc.to_netcdf(f'{save_bias_corrected}/gefs_acc_values_{season}_{obs_source}.nc')

# Compute the CRPS only testing data

In [None]:
ecmwf_crps = verifications.create_climpred_CRPS(additive_bias_ecmwf_anom.sel(init=slice(test_start,None)), obs_anom_climp)
gefs_crps = verifications.create_climpred_CRPS(additive_bias_gefs_anom.sel(init=slice(test_start,None)), obs_anom_climp)

ecmwf_crps.to_netcdf(f'{save_bias_corrected}/ecmwf_crps_values_{obs_source}.nc')
gefs_crps.to_netcdf(f'{save_bias_corrected}/gefs_crps_values_{obs_source}.nc')

In [None]:
'''Compute for each season individually'''
for season, months in seasons.items():
    
    # Filter datasets for the current season
    gefs_season = additive_bias_gefs_anom.sel(init=additive_bias_gefs_anom['init'].dt.month.isin(months)).sel(init=slice(test_start,None))
    ecmwf_season = additive_bias_ecmwf_anom.sel(init=additive_bias_ecmwf_anom['init'].dt.month.isin(months)).sel(init=slice(test_start,None))

    ecm_acc = verifications.create_climpred_CRPS(ecmwf_season, obs_anom_climp)
    gef_acc = verifications.create_climpred_CRPS(gefs_season, obs_anom_climp)

    ecm_acc.to_netcdf(f'{save_bias_corrected}/ecmwf_crps_values_{season}_{obs_source}.nc')
    gef_acc.to_netcdf(f'{save_bias_corrected}/gefs_crps_values_{season}_{obs_source}.nc')

# Make CRPSS

In [None]:
ecmwf_crps = verifications.create_climpred_CRPSS(additive_bias_ecmwf_anom.sel(init=slice(test_start,None)), obs_anom_climp)
gefs_crps = verifications.create_climpred_CRPSS(additive_bias_gefs_anom.sel(init=slice(test_start,None)), obs_anom_climp)

ecmwf_crps.to_netcdf(f'{save_bias_corrected}/ecmwf_crpss_values_{obs_source}.nc')
gefs_crps.to_netcdf(f'{save_bias_corrected}/gefs_crpss_values_{obs_source}.nc')

In [None]:
'''Compute for each season individually'''
for season, months in seasons.items():
    
    # Filter datasets for the current season
    gefs_season = additive_bias_gefs_anom.sel(init=additive_bias_gefs_anom['init'].dt.month.isin(months)).sel(init=slice(test_start,None))
    ecmwf_season = additive_bias_ecmwf_anom.sel(init=additive_bias_ecmwf_anom['init'].dt.month.isin(months)).sel(init=slice(test_start,None))

    ecm_acc = verifications.create_climpred_CRPSS(ecmwf_season, obs_anom_climp)
    gef_acc = verifications.create_climpred_CRPSS(gefs_season, obs_anom_climp)

    ecm_acc.to_netcdf(f'{save_bias_corrected}/ecmwf_crpss_values_{season}_{obs_source}.nc')
    gef_acc.to_netcdf(f'{save_bias_corrected}/gefs_crpss_values_{season}_{obs_source}.nc')