## Interpretability Script!
This script is to explore the relative contributions of the MJO & ENSO (indices) to prediction of z500 in the North Pacific (part of the PNA) 

## Prep for analysis

In [1]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import tensorflow as tf
import random 
from datetime import datetime
import pandas as pd

import sys
sys.path.append('/glade/work/kjmayer/research/catalyst/ENSOvsMJO/utils/')
# sys.path.append('/glade/u/home/wchapman/ENSOvsMJO/utils/')
from exp_hp import get_hp
from trainGordon_utils import subset, build_model, fullmodel, scheduler, plot_results, adjust_spines
from dataprep_utils import get_testing
sys.path.append('/glade/work/kjmayer/research/catalyst/ENSOvsMJO/interpret/')
# sys.path.append('/glade/u/home/wchapman/ENSOvsMJO/interpret/')
from Gordon_interp import getoutputvecs, confvacc, iconfcorr


# import importlib
# importlib.reload(sys.modules["Gordon_interp"])
# from Gordon_interp import getoutputvecs, confvacc, iconfcorr
# importlib.reload(sys.modules["trainGordon_utils"])
# from trainGordon_utils import subset, build_model, fullmodel, scheduler, plot_results

import matplotlib as mpl
import matplotlib.pyplot as plt

plt.rc('text',usetex=True)
plt.rcParams['font.family']='sans-serif'
plt.rcParams['font.sans-serif']=['Verdana']
plt.rcParams.update({'font.size': 15})
def adjust_spines(ax, spines):
    for loc, spine in ax.spines.items():
        if loc in spines:
            spine.set_position(('outward', 5))
        else:
            spine.set_color('none')
    if 'left' in spines:
        ax.yaxis.set_ticks_position('left')
    else:
        ax.yaxis.set_ticks([])
    if 'bottom' in spines:
        ax.xaxis.set_ticks_position('bottom')
    else:
            ax.xaxis.set_ticks([])
mpl.rcParams['figure.facecolor'] = 'white'
mpl.rcParams['figure.dpi'] = 150
dpiFig = 300.

2023-09-08 14:40:26.620628: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-08 14:40:26.921340: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


### Load network:

In [2]:
MODEL_DIR = '/glade/scratch/wchapman/ENSOmjo_ML_models/saved_models/'
EXP_NAME = 'default'
hps = get_hp(EXP_NAME)
# variables:
DROPOUT_RATE = hps['DROPOUT_RATE']

MODELNAME1 = 'ENSO'
RIDGE1 = hps['RIDGE1']
HIDDENS1 = hps['HIDDENS1']

MODELNAME2 = 'MJO'
RIDGE2 = hps['RIDGE2']
HIDDENS2 = hps['HIDDENS2']

BATCH_SIZE = hps['BATCH_SIZE']
N_EPOCHS = 10000
PATIENCE = hps['PATIENCE'] # number of epochs of no "improvement" before training is stopped
LR = hps['LR'] # learning rate


LEADS = np.arange(7,31)
AVGS = np.arange(7,32)
SEEDS = np.arange(1,6)

SAVE = True

In [None]:
for l in LEADS:
    print('LEAD: '+str(l))
    for a in AVGS:
        print('AVG: '+str(a))
        print('load testing data')
        X1test, X2test, Ytest = get_testing(N_z500runmean=a,
                                            LEAD=l)
                
        INPUT_SHAPE1 = np.shape(X1test)[1:][0]
        INPUT_SHAPE2 = np.shape(X2test)[1:][0]
        
        confvsacc = np.zeros(shape=(len(SEEDS),100))
        
        model1_rawpreds = np.zeros(shape=(len(SEEDS),np.shape(X1test)[0],2))
        model2_rawpreds = np.zeros(shape=(len(SEEDS),np.shape(X1test)[0],2))
        model_rawpreds = np.zeros(shape=(len(SEEDS),np.shape(X1test)[0],2))
        
        model1_fracpred = np.zeros(shape=(len(SEEDS)))
        model2_fracpred = np.zeros(shape=(len(SEEDS)))
        model12_fracpred = np.zeros(shape=(len(SEEDS)))
        
        for s in SEEDS:
            # ENSO MODEL
            model1, input1 = build_model(s,
                                         DROPOUT_RATE,
                                         RIDGE1,
                                         HIDDENS1,
                                         INPUT_SHAPE1,
                                         MODELNAME1)
            # MJO MODEL
            model2, input2 = build_model(s,
                                         DROPOUT_RATE,
                                         RIDGE2,
                                         HIDDENS2,
                                         INPUT_SHAPE2,
                                         MODELNAME2)   
            # COMBINE ENSO & MJO MODEL
            model = fullmodel(model1, model2,
                              input1, input2,
                              s)
            
            MODEL_FINAME = 'LEAD_'+str(l)+'_AVG_'+str(a)+'__0000'+str(s)+'.h5'
            model.load_weights(MODEL_DIR+MODEL_FINAME)
            
            model_rawpreds[s-1] = model.predict((X1test,X2test))
            conf    = np.max(model_rawpreds[s-1],axis=-1)
            predval = np.argmax(model_rawpreds[s-1],axis=-1)
            
            # ----- confidence vs accuracy for all seeds:
            confvsacc[s-1], _, _ = confvacc(confval = conf,
                                          predval = predval,
                                          Ytest   = Ytest)
            
            # ----- model contribution:
            model1_rawpreds[s-1], model2_rawpreds[s-1] = getoutputvecs(model,
                                                                   model1,
                                                                   model2,
                                                                   X1test,
                                                                   X2test)
            # model X winning class (model X output * weight)
            model1pred = np.argmax(model1_rawpreds[s-1],axis=1)
            model2pred = np.argmax(model2_rawpreds[s-1],axis=1)
        
            # Where ENSO/MJO/final model (model 1/model 2/total) are correct
            i_model1corr = model1pred==Ytest
            i_model2corr = model2pred==Ytest
            i_modelcorr  = predval==Ytest
        
            # terminology: "win" = modelX prediction is also (correct) full model prediction
            # model X correct & model correct (model ~X not correct)
            i_model1win = i_model1corr & i_modelcorr & ~i_model2corr
            i_model2win = i_model2corr & i_modelcorr & ~i_model1corr
            # model 1&2 correct & model correct
            i_model12win = i_model1corr & i_model2corr & i_modelcorr
        
            # For correct predictions: model(X) values & predicted class when also full model prediction
            model1win_contribution = model1_rawpreds[s-1][i_model1win]
            model1win_predval = model1pred[i_model1win]

            model2win_contribution = model2_rawpreds[s-1][i_model2win]
            model2win_predval = model2pred[i_model2win]

            model12win_predval = model_rawpreds[s-1][i_model12win]
            
            # these shapes should be equal (True), assuming ~i_model1corr & ~i_model2corr & i_modelcorr doesnt happen
            if model_rawpreds[s-1][i_modelcorr].shape[0] == model1win_predval.shape[0] + model2win_predval.shape[0] + model12win_predval.shape[0]:
                print('SEED: '+str(s))
                # Percentage of model predictions correct due to just ENSO/MJO/ENSO&MJO:
                model1_fracpred[s-1]  = (model1win_predval.shape[0]/model_rawpreds[s-1][i_modelcorr].shape[0])
                model2_fracpred[s-1]  = (model2win_predval.shape[0]/model_rawpreds[s-1][i_modelcorr].shape[0])
                model12_fracpred[s-1] = (model12win_predval.shape[0]/model_rawpreds[s-1][i_modelcorr].shape[0])        
        
        if SAVE:
            print('saving')
            ddir_save = '/glade/work/kjmayer/research/catalyst/ENSOvsMJO/data/'
            
            finame_confvsacc = 'confvsacc_LEAD_'+str(l)+'_AVG_'+str(a)+'__00001-00005.npy'
            np.save(ddir_save+finame_confvsacc, confvsacc)
            
            finame_rawpred = 'model1_rawpred_LEAD_'+str(l)+'_AVG_'+str(a)+'__00001-00005.npy'
            np.save(ddir_save+finame_rawpred, model1_rawpreds)
            finame_rawpred = 'model2_rawpred_LEAD_'+str(l)+'_AVG_'+str(a)+'__00001-00005.npy'
            np.save(ddir_save+finame_rawpred, model2_rawpreds)
            finame_rawpred = 'model_rawpred_LEAD_'+str(l)+'_AVG_'+str(a)+'__00001-00005.npy'
            np.save(ddir_save+finame_rawpred, model_rawpreds)
            
            finame_fracpred = 'model1_fracpred_LEAD_'+str(l)+'_AVG_'+str(a)+'__00001-00005.npy'
            np.save(ddir_save+finame_fracpred, model1_fracpred)
            finame_fracpred = 'model2_fracpred_LEAD_'+str(l)+'_AVG_'+str(a)+'__00001-00005.npy'
            np.save(ddir_save+finame_fracpred, model2_fracpred)
            finame_fracpred = 'model12_fracpred_LEAD_'+str(l)+'_AVG_'+str(a)+'__00001-00005.npy'
            np.save(ddir_save+finame_fracpred, model12_fracpred)


LEAD: 7
AVG: 7
load testing data


2023-09-08 14:43:09.628197: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


SEED: 1
SEED: 2
SEED: 3
SEED: 4
SEED: 5
saving
AVG: 8
load testing data
SEED: 1
SEED: 2
SEED: 3
 27/361 [=>............................] - ETA: 0s 