In [1]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas
import xarray
import cftime
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
datapath = 'nino34_monthly.nc'
nino34 = xarray.open_dataset(datapath, decode_times = False)
print(nino34)
nino34 = np.array(nino34['nino34'].values)

<xarray.Dataset>
Dimensions:         (bounds: 2, month: 12, time: 7800)
Coordinates:
  * time            (time) float64 15.5 45.0 74.5 ... 2.372e+05 2.372e+05
Dimensions without coordinates: bounds, month
Data variables:
    nino34          (time) float64 ...
    time_bnds       (time, bounds) float64 ...
    areacello       float32 ...
    days_per_month  (month) int32 ...


In [3]:
def ONI(nino34, m = 3):
    oni = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        oni[i] = np.mean(nino34[max(0, (i - m + 1)) : min((i + 1), length)])
    return oni

In [4]:
oni = ONI(nino34)

In [5]:
def climatology(nino34):
    clm = np.zeros(12)
    length = nino34.shape[0]
    for month in range(12):
        section = [12 * i + month for i in range(length // 12)]
        clm[month] = np.mean(nino34[section])
    return clm

In [6]:
clm = climatology(nino34)

In [7]:
def SST_anomaly(nino34, clm):
    anm = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        anm[i] = nino34[i] - clm[i % 12]
    return anm

In [8]:
anm = SST_anomaly(nino34, clm)
oanm = ONI(anm)

In [18]:
T = 6                       # prediction timeline
H = 48                      # history used for prediction
n_classes = 3               # number of classes (El Nino, El Nina, No Event)
threshold = 0.5         
signal = np.array(nino34[12 * 50:])   # data used for training/testing
length = signal.shape[0]    # number of data points
size = length - H - T       # effective dataset size

In [19]:
# create the 'history matrix'
data = np.ndarray((size, H))
for i in range(size):
    data[i] = signal[i:(i + H)]

# label El Nino as 2, El Nina as 0 and no event as 1
labels = np.ndarray((size))
for i in range(length - H - T):
    if(oanm[i + H + T] >= threshold):
        labels[i] = 2
    elif(oanm[i + H + T] <= -threshold):
        labels[i] = 0
    else:
        labels[i] = 1

In [20]:
np.random.seed(0)

size2 = size // 12
acc_m = np.zeros(size2)
loss_m = np.zeros(size2)

month = ["Jan", "Feb", "Mar", "Apr", "May", "June", "July", "Aug", "Sep", "Oct", "Nov", "Dec"]

for m in range(12):
    
    split = size2 // 10      
    shuffle = 12 * np.random.permutation(size2) + m * np.ones(size2, dtype = int)
    
    train_ind = np.array(shuffle[0: 8 * split])
    val_ind = np.array(shuffle[(8 * split + 1): 9 * split])
    test_ind = np.array(shuffle[(9 * split + 1): size])

    train = np.array(data[train_ind])
    train_labels = np.array(labels[train_ind])

    val = np.array(data[val_ind])
    val_labels = np.array(labels[val_ind])

    test = np.array(data[test_ind])
    test_labels = np.array(labels[test_ind])
    
    mean = np.mean(train, axis = 0)
    std = np.std(train, axis = 0)

    train_n = np.divide(
        train - np.outer(np.ones(train.shape[0]), mean),
        np.outer(np.ones(train.shape[0]), std))

    val_n = np.divide(
        val - np.outer(np.ones(val.shape[0]), mean),
        np.outer(np.ones(val.shape[0]), std))

    test_n = np.divide(
        test - np.outer(np.ones(test.shape[0]), mean),
        np.outer(np.ones(test.shape[0]), std))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        encoded_train_labels = tf.one_hot(train_labels, depth = n_classes).eval()
        encoded_val_labels = tf.one_hot(val_labels, depth = n_classes).eval()
        encoded_test_labels = tf.one_hot(test_labels, depth = n_classes).eval()
        
    (d, N1, N2, N3) = (0.3, 768, 768, 384)
    epochs = 60
        
    model = tf.keras.models.Sequential([
              tf.keras.layers.Dense(N1, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N2, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N3, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(n_classes, activation=tf.nn.softmax)
            ])

    model.reset_states()

    model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

    model.fit(train_n, encoded_train_labels, epochs = epochs, verbose = 0)
    (loss_m[m], acc_m[m]) = model.evaluate(val_n, encoded_val_labels, verbose = 0)
        
    print("Accuracy on", month[m], ":", acc_m[m])

Accuracy on Jan : 0.4655172444623092
Accuracy on Feb : 0.5000000010276663
Accuracy on Mar : 0.43103448686928586
Accuracy on Apr : 0.48275862377265405
Accuracy on May : 0.3965517261932636
Accuracy on June : 0.25862069222433814
Accuracy on July : 0.3448275903175617
Accuracy on Aug : 0.3448275887760623
Accuracy on Sep : 0.37931034996591767
Accuracy on Oct : 0.41379310344827586
Accuracy on Nov : 0.3448275903175617
Accuracy on Dec : 0.4310344848139533
