In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import xarray as xr
import numpy as np
import os
from glob import glob
import functions as f
#import climpredNEW.climpred 
#from climpredNEW.climpred.options import OPTIONS
from mpl_toolkits.basemap import Basemap
from numpy import meshgrid
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
import matplotlib.colors as mcolors
import cartopy.feature as cfeature
import itertools
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter, LatitudeLocator
import matplotlib.ticker as mticker
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, TwoSlopeNorm
import pandas as pd
import math
from scipy.stats import percentileofscore as pos
from datetime import datetime
import datetime as dt
from multiprocessing import Pool
from sklearn.metrics import confusion_matrix as CM



2024-01-25 07:39:12.631091: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-25 07:39:15.252652: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Set script parameters
CONUS_mask = f.load_CONUS_mask() #Mask of CONUS which serves as our bounding box. Can later change this to a larger file but then we would have to edit the data from the previous scripts. 

max_RZSM_reforecast, min_RZSM_reforecast = f.load_reforecast_min_max_RZSM()

# Data

In [5]:
#dates
start_ = '2019-08-01'
end_ = '2019-10-30'

southeast_lat_bottom  = 30
southeast_lat_top = 38

southeast_lon_left  = 267
southeast_lon_right = 282

#Mask with np.nan for non-CONUS land values
mask_anom = CONUS_mask['NCA-LDAS_mask'][0,:,:].values


In [6]:
obs_anomaly_SubX_format =xr.open_mfdataset('Data/GLEAM/RZSM_anomaly_reformat_SubX_format/RZSM_anomaly*.nc4').sel(L=[0,6,13,20,27,34]).astype(np.float32).load()

obs_anomaly_SubX_format_subset = obs_anomaly_SubX_format.sel(S=slice(start_,end_)).sel(X=slice(southeast_lon_left,southeast_lon_right)).sel(Y=slice(southeast_lat_top,southeast_lat_bottom)).mean(dim='M')


In [7]:
#######################################   Reforecast baseline files   ###########################################################################
baseline_anomaly_file_list = sorted(glob('Data/GEFSv12_reforecast/soilw_bgrnd/baseline_RZSM_anomaly/RZSM*.nc'))
baseline_anomaly = xr.open_mfdataset(baseline_anomaly_file_list).sel(L=[0,6,13,20,27,34]).astype(np.float32).load()

In [None]:
template = xr.open_mfdataset('Data/GLEAM/reformat_to_reforecast_shape/RZSM_weighted/*.nc4').sel(L=[0,6,13,20,27,34]).astype(np.float32).load()

template_testing_only = template.sel(S=slice('2018-01-01','2019-12-31'))

In [87]:
obs_anomaly_SubX_format_lead20 = obs_anomaly_SubX_format_subset.sel(L=20)

In [170]:
def create_reforecast_with_predictions_week3(experiment_list,lead_day):
    #Load previous predictions from experiments
    temp_cp = template_testing_only.copy(deep=True).sel(L=lead_day)
    
    for idx,lead in enumerate([3]):

        test = f.reverse_min_max_scaling(np.load(f'predictions/Wk_{lead}_testing/Wk{lead}_testing_{experiment_list[idx]}.npy')[2,:,:,:,0],max_RZSM_reforecast, min_RZSM_reforecast)
        test = np.reshape(test,(test.shape[0]//11,11,test.shape[1],test.shape[2]))

        #Apply CONUS mask 
        test = np.where(mask_anom == 1, test, np.nan)
        
        #Add data to file
        temp_cp.RZSM[:,:,:,:] = test

    #Mask the Southeast 
    temp_cp = temp_cp.sel(X=slice(southeast_lon_left,southeast_lon_right)).sel(Y=slice(southeast_lat_top,southeast_lat_bottom)).mean(dim='M')
    temp_cp = temp_cp.sel(S=slice(start_,end_))
    
    return(temp_cp)

In [3]:
#Create the UNET prediction dataset to do other functions
experiment_list = ['EX10_denseLossRZSM','EX20_RZSM','EX20_RZSM','EX20_RZSM','EX20_RZSM','EX26_RZSM'] #Best model outputs

In [39]:
#Different configurations

wk0 = [f'EX{i}_RZSM' for i in range(13)]
wk1 = [f'EX{i}_RZSM' for i in range(26)]
wk2 = [f'EX{i}_RZSM' for i in range(26)]
wk3 = [f'EX{i}_RZSM' for i in range(26)]
wk4a = [f'EX{i}_RZSM' for i in range(12)]
wk4b = [f'EX{i}_RZSM' for i in range(13,26)]
wk4 = wk4a + wk4b
wk5 = ['EX26_RZSM']

In [203]:
mae_unet_out = {}
for d in wk3:
    experiment_list = [d]
    unet = create_reforecast_with_predictions_week3(experiment_list)
    unet = unet.assign_coords({'S':obs_anomaly_SubX_format_lead20.S.values})
    one_init_date = obs_anomaly_SubX_format_lead20.S.values[4]
    
    obs_corr = obs_anomaly_SubX_format_lead20.sel(S=one_init_date).RZSM.values
    unet_corr = unet.sel(S=one_init_date).RZSM.values

    

    mae = np.nanmean(np.abs(obs_corr - unet_corr))
    mae_unet_out[d] = mae

##Baseline GEFSv12 forecast
base_corr = baseline_anomaly.sel(S=one_init_date).mean(dim='M').sel(L=20).sel(X=slice(southeast_lon_left,southeast_lon_right)).sel(Y=slice(southeast_lat_top,southeast_lat_bottom)).RZSM.values
base_corr = np.where(~np.isnan(obs_corr),base_corr,np.nan)

mae = np.nanmean(np.abs(obs_corr - base_corr))
mae_unet_out['Baseline']=mae

#final  results for week 3:. Baseline is still better than UNET for extreme flash drought events (single day forecasts from 


In [152]:
#This would be for all the experiments. ~5 milion combinations. But we only need a single week

mae_min = 0
for a in wk0:
    for b in wk1:
        for c in wk2:
            for d in wk3:
                for e in wk4:
                    for f1 in wk5:
                        experiment_list = [a,b,c,d,e,f1]
                        unet = create_reforecast_with_predictions(experiment_list)
                        unet = unet.assign_coords({'S':obs_anomaly_SubX_format_lead20.S.values})
                        one_init_date = obs_anomaly_SubX_format_lead20.S.values[4]
                        obs_corr = obs_anomaly_SubX_format_lead20.sel(S=one_init_date).RZSM.values
                        unet_corr = unet.sel(S=one_init_date).RZSM.values

                        mae = np.nanmean(np.abs(obs_corr - unet_corr))

                        # #Mask np.nan values
                        # data_corr = obs_corr[~np.isnan(obs_corr)]
                        # unet_corr = unet_corr[~np.isnan(obs_corr)]

                        # unet_corr1 = unet_corr[~np.isnan(unet_corr)]
                        # data_corr1 = data_corr[~np.isnan(unet_corr)]
                        
                        # correlation_matrix = np.corrcoef(data_corr1, unet_corr1)
                        # # The correlation coefficient is in the top right corner of the correlation matrix
                        # correlation_coefficient = correlation_matrix[0, 1]
                        # correlation_coefficient = round(correlation_coefficient,4)

                        if mae < mae_min:
                            mae_min = mae
                            best_experiment_list = [a,b,c,d,e,f1]

KeyboardInterrupt: 

In [None]:
best_experiment_list

In [None]:
mae_min

# Now do a case study of 2019 Southeast Flash Drought (ensemble mean only)

In [402]:
obs = xr.where(mask_anom ==1, obs,np.nan).sel(L=[20,27,34])
unet = xr.where(mask_anom ==1, unet,np.nan)
baseline = xr.where(mask_anom ==1, baseline,np.nan)

In [408]:
def get_min_max_of_files(obs, unet, baseline, date):
    #test 
    # date = '2019-08-07'
    
    min_ = []
    max_ = []

    min_.append(obs.sel(S=date).min().rci.values)
    min_.append(unet.sel(S=date).min().rci.values)
    min_.append(baseline.sel(S=date).min().rci.values)

    max_.append(obs.sel(S=date).max().rci.values)
    max_.append(unet.sel(S=date).max().rci.values)
    max_.append(baseline.sel(S=date).max().rci.values)

    return(min(min_),max(max_))

In [428]:
def return_array(file,lead,date):
    return(file.sel(L=lead,S=date).rci.values)

# Plot anomaly for 2019

In [7]:
obs_anomaly_mf = xr.open_mfdataset('Data/GLEAM/RZSM_anomaly_reformat_SubX_format/RZSM_anomaly*.nc4').sel(L=[20,27,34]).load()

In [11]:

obs_anom = obs_anomaly_mf.sel(S=slice(start_,end_)).mean(dim='M')
unet_anom = unet_anomaly.sel(S=slice(start_,end_)).mean(dim='M')
baseline_anom = baseline_anomaly.sel(S=slice(start_,end_)).mean(dim='M')

obs_anom = xr.where(mask_anom ==1, obs_anom,np.nan).sel(L=[20,27,34])
unet_anom = xr.where(mask_anom ==1, unet_anom,np.nan).sel(L=[20,27,34])
baseline_anom = xr.where(mask_anom ==1, baseline_anom,np.nan).sel(L=[20,27,34])

unet_anom = xr.where(~np.isnan(obs_anom), unet_anom,np.nan)
baseline_anom = xr.where(~np.isnan(obs_anom), baseline_anom,np.nan)

In [19]:
def get_min_max_of_files_anomaly(obs, unet, baseline, date):
    #test 
    # date = '2019-08-07'
    
    min_ = []
    max_ = []

    min_.append(obs.sel(S=date).min().RZSM.values)
    min_.append(unet.sel(S=date).min().RZSM.values)
    min_.append(baseline.sel(S=date).min().RZSM.values)

    max_.append(obs.sel(S=date).max().RZSM.values)
    max_.append(unet.sel(S=date).max().RZSM.values)
    max_.append(baseline.sel(S=date).max().RZSM.values)

    return(min(min_),max(max_))

def return_array_anomaly(file,lead,date):
    return(file.sel(L=lead,S=date).RZSM.values)

# Loop over each experiment to find the best ones which represent only the Southeast

In [55]:


   
# cmap = 'coolwarm'
def plot_case_study_anomaly(obs, unet, baseline, init_date):

    text_x = -83.5
    text_y = 27
    font_size_corr = 12
    
    cmap = plt.get_cmap('bwr')    
    
    save_dir = f'Outputs/Case_studies/Southeast_US/anomaly'
    os.system(f'mkdir -p {save_dir}')
        
    fig, axs = plt.subplots(
        nrows = 3, ncols= 3, subplot_kw={'projection': ccrs.PlateCarree()}, figsize=(15, 10))
    axs = axs.flatten()
    
    init_date = pd.to_datetime(init_date)
    date = f'{init_date.year}-{init_date.month:02}-{init_date.day:02}'
    
    min_,max_ = get_min_max_of_files_anomaly(obs, unet, baseline, date)
    # test_file = mae_rzsm_keys
    # for Subx original data
    
    lon = obs.X.values
    lat = obs.Y.values
    
    axs_start = 0
    for lead in [20,27,34]:
        for data_to_plot,name in zip([obs, unet, baseline], ['GLEAM','UNET','Baseline']):
            # break
            data = return_array_anomaly(file=data_to_plot,lead=lead, date=date)
    
            v = np.linspace(min_, max_, 20, endpoint=True)
        
            map = Basemap(projection='cyl', llcrnrlat=25, urcrnrlat=50,
                          llcrnrlon=-128, urcrnrlon=-60, resolution='l')
            x, y = map(*np.meshgrid(lon, lat))
            # Adjust the text coordinates based on the actual data coordinates
        
            norm = TwoSlopeNorm(vmin=min_, vcenter=0, vmax=max_)
        
            im = axs[axs_start].contourf(x, y, data, levels=v, extend='both',
                                  transform=ccrs.PlateCarree(), cmap=cmap,norm=norm)
    
    
            # axs[idx].title.set_text(f'SubX Lead {lead*7}')
            gl = axs[axs_start].gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
                                       linewidth=0.7, color='gray', alpha=0.5, linestyle='--')
            gl.xlabels_top = False
            gl.ylabels_right = False
            if lead != 1:
                gl.ylabels_left = False
            gl.xformatter = LongitudeFormatter()
            gl.yformatter = LatitudeFormatter()
            axs[axs_start].coastlines()
            # plt.colorbar(im)
            # axs[idx].set_aspect('auto', adjustable=None)
            axs[axs_start].set_aspect('equal')  # this makes the plots better
            axs[axs_start].set_title(f'{name} Lead {lead}',fontsize=15)

            if name in ['UNET','Baseline']:
                # Calculate the Pearson correlation coefficient
                obs_corr = return_array_anomaly(file=obs,lead=lead, date=date).flatten()
                data_corr = data.flatten()

                data_corr = data_corr[~np.isnan(obs_corr)]
                obs_corr = obs_corr[~np.isnan(obs_corr)]
                
                correlation_matrix = np.corrcoef(obs_corr, data_corr)
                # The correlation coefficient is in the top right corner of the correlation matrix
                correlation_coefficient = correlation_matrix[0, 1]
                correlation_coefficient = round(correlation_coefficient,4)
                #find the correlation coefficient across the dataset
                axs[axs_start].text(text_x, text_y, f'Corr: {correlation_coefficient}', ha='right', va='bottom', fontsize=font_size_corr, color='blue', weight = 'bold')
            
            
            axs_start+=1
            
    cbar_ax = fig.add_axes([0.05, -0.05, .9, .04])
    
    # Draw the colorbar
    cbar = fig.colorbar(im, cax=cbar_ax, orientation='horizontal')
    fig.suptitle(f'Init date: {date}', fontsize=30)
    fig.tight_layout()
    
    plt.savefig(f'{save_dir}/Southeast_anomaly_init{date}.png',bbox_inches='tight')
    plt.show()


In [None]:
# plot_case_study_anomaly(obs=obs_anom, unet=unet_anom, baseline=baseline_anom, init_date=obs_anom.S.values[0])

In [None]:
for init_date in obs_anom.S.values:
    plot_case_study_anomaly(obs=obs_anom, unet=unet_anom, baseline=baseline_anom, init_date=init_date)