In [2]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas
import xarray
import cftime
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [3]:
datapath = 'nino34_monthly.nc'
nino34 = xarray.open_dataset(datapath, decode_times = False)
print(nino34)
nino34 = np.array(nino34['nino34'].values)

<xarray.Dataset>
Dimensions:         (bounds: 2, month: 12, time: 7800)
Coordinates:
  * time            (time) float64 15.5 45.0 74.5 ... 2.372e+05 2.372e+05
Dimensions without coordinates: bounds, month
Data variables:
    nino34          (time) float64 ...
    time_bnds       (time, bounds) float64 ...
    areacello       float32 ...
    days_per_month  (month) int32 ...


In [4]:
def ONI(nino34, m = 3):
    oni = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        oni[i] = np.mean(nino34[max(0, (i - m + 1)) : min((i + 1), length)])
    return oni

In [5]:
oni = ONI(nino34)

In [6]:
def climatology(nino34):
    clm = np.zeros(12)
    length = nino34.shape[0]
    for month in range(12):
        section = [12 * i + month for i in range(length // 12)]
        clm[month] = np.mean(nino34[section])
    return clm

In [7]:
clm = climatology(nino34)

In [8]:
def SST_anomaly(nino34, clm):
    anm = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        anm[i] = nino34[i] - clm[i % 12]
    return anm

In [9]:
anm = SST_anomaly(nino34, clm)
oanm = ONI(anm)

# Data Preparation

In [14]:
T = [1, 4, 7, 10, 13]       # prediction timeline
H = [12, 24, 36, 48, 60]    # history used for prediction
include_month = 1           # 1 if we use the month as a feature, 0 otherwise
n_classes = 3               # number of classes (El Nino, El Nina, No Event)
threshold = 0.5         
signal = np.array(nino34[12 * 50:])   # data used for training/testing
length = signal.shape[0]    # number of data points

In [15]:
np.random.seed(0)

for t in T:
    for h in H:
        
        size = length - h - t       # effective dataset size
        
        # create the 'history matrix'
        data = np.ndarray((size, h + include_month))
        for i in range(size):
            if(include_month == False):
                data[i] = signal[i:(i + h)]
            else:
                data[i] = np.append(signal[i:(i + h)], (i + h + t) % 12)

        # label El Nino as 2, El Nina as 0 and no event as 1
        labels = np.ndarray((size))
        
        for i in range(size):
            if(oanm[i + h + t] >= threshold):
                labels[i] = 2
            elif(oanm[i + h + t] <= -threshold):
                labels[i] = 0
            else:
                labels[i] = 1

        split = size // 10      
        shuffle = np.random.permutation(size)
        train_ind = np.array(shuffle[0: 8 * split])
        val_ind = np.array(shuffle[(8 * split + 1): 9 * split])
        test_ind = np.array(shuffle[(9 * split + 1): size])

        train = np.array(data[train_ind])
        train_labels = np.array(labels[train_ind])

        val = np.array(data[val_ind])
        val_labels = np.array(labels[val_ind])

        test = np.array(data[test_ind])
        test_labels = np.array(labels[test_ind])
        
        mean = np.mean(train, axis = 0)
        std = np.std(train, axis = 0)

        train_n = np.divide(
            train - np.outer(np.ones(train.shape[0]), mean),
            np.outer(np.ones(train.shape[0]), std))

        val_n = np.divide(
            val - np.outer(np.ones(val.shape[0]), mean),
            np.outer(np.ones(val.shape[0]), std))

        test_n = np.divide(
            test - np.outer(np.ones(test.shape[0]), mean),
            np.outer(np.ones(test.shape[0]), std))
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            encoded_train_labels = tf.one_hot(train_labels, depth = n_classes).eval()
            encoded_val_labels = tf.one_hot(val_labels, depth = n_classes).eval()
            encoded_test_labels = tf.one_hot(test_labels, depth = n_classes).eval()
            
        (d, N1, N2, N3) = (0.3, 768, 768, 384)
        epochs = 60
        
        acc_th = {}
        loss_th = {}
        
        model = tf.keras.models.Sequential([
              tf.keras.layers.Dense(N1, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N2, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N3, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(n_classes, activation=tf.nn.softmax)
            ])

        model.reset_states()

        model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

        model.fit(train_n, encoded_train_labels, epochs = epochs, verbose = 0)
        (loss_th[(t, h)], acc_th[(t, h)]) = model.evaluate(val_n, encoded_val_labels, verbose = 0)
        
        print(t, h, "=>", acc_th[(t, h)])

1 12 => 0.39748953979051926
1 24 => 0.4916201115653502
1 36 => 0.682517482767572
1 48 => 0.7044817927170869
1 60 => 0.7724719101123596
4 12 => 0.39051603955038755
4 24 => 0.5055865920122775
4 36 => 0.6377622381790534
4 48 => 0.7391304350333996
4 60 => 0.800561797752809
7 12 => 0.38354253847894976
7 24 => 0.4769230771314848
7 36 => 0.6246498602778972
7 48 => 0.7503506312196419
7 60 => 0.7640449438202247
10 12 => 0.4036312849162011
10 24 => 0.49930069963415186
10 36 => 0.6428571430241027
10 48 => 0.7545582047894827
10 60 => 0.7893258426966292
13 12 => 0.4259776536312849
13 24 => 0.5482517485018377
13 36 => 0.6750700281781643
13 48 => 0.7275280898876404
13 60 => 0.7721518987760932
