In [1]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas
import xarray
import cftime
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
datapath = 'nino34_monthly.nc'
nino34 = xarray.open_dataset(datapath, decode_times = False)
print(nino34)
nino34 = np.array(nino34['nino34'].values)

<xarray.Dataset>
Dimensions:         (bounds: 2, month: 12, time: 7800)
Coordinates:
  * time            (time) float64 15.5 45.0 74.5 ... 2.372e+05 2.372e+05
Dimensions without coordinates: bounds, month
Data variables:
    nino34          (time) float64 ...
    time_bnds       (time, bounds) float64 ...
    areacello       float32 ...
    days_per_month  (month) int32 ...


In [3]:
def ONI(nino34, m = 3):
    oni = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        oni[i] = np.mean(nino34[max(0, (i - m + 1)) : min((i + 1), length)])
    return oni

In [4]:
oni = ONI(nino34)

In [5]:
def climatology(nino34):
    clm = np.zeros(12)
    length = nino34.shape[0]
    for month in range(12):
        section = [12 * i + month for i in range(length // 12)]
        clm[month] = np.mean(nino34[section])
    return clm

In [6]:
clm = climatology(nino34)

In [7]:
def SST_anomaly(nino34, clm):
    anm = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        anm[i] = nino34[i] - clm[i % 12]
    return anm

In [8]:
anm = SST_anomaly(nino34, clm)
oanm = ONI(anm)

In [59]:
T = 6                       # prediction timeline
H = 48                      # history used for prediction
n_classes = 3               # number of classes (El Nino, El Nina, No Event)
threshold = 0.5         
signal = np.array(nino34[12 * 50:])   # data used for training/testing
length = signal.shape[0]    # number of data points
size = length - H - T       # effective dataset size

In [64]:
# create the 'history matrix'
data = np.ndarray((size, H))
for i in range(size):
    data[i] = signal[i:(i + H)]

# label El Nino as 2, El Nina as 0 and no event as 1
labels = np.ndarray((size))
for i in range(length - H - T):
    if(oanm[i + H + T] >= threshold):
        labels[i] = 2
    elif(oanm[i + H + T] <= -threshold):
        labels[i] = 0
    else:
        labels[i] = 1

# Fixed Month (February)

In [63]:
# predicting february

np.random.seed(0)

size2 = size // 12
    
split = size2 // 10      
shuffle = 12 * np.random.permutation(size2) + np.ones(size2, dtype = int)
    
train_ind = np.array(shuffle[0: 8 * split])
val_ind = np.array(shuffle[8 * split: 9 * split])
test_ind = np.array(shuffle[9 * split: size2])

train = np.array(data[train_ind])
train_labels = np.array(labels[train_ind])

val = np.array(data[val_ind])
val_labels = np.array(labels[val_ind])

test = np.array(data[test_ind])
test_labels = np.array(labels[test_ind])
    
mean = np.mean(train, axis = 0)
std = np.std(train, axis = 0)

train_n = np.divide(
        train - np.outer(np.ones(train.shape[0]), mean),
        np.outer(np.ones(train.shape[0]), std))

val_n = np.divide(
        val - np.outer(np.ones(val.shape[0]), mean),
        np.outer(np.ones(val.shape[0]), std))

test_n = np.divide(
        test - np.outer(np.ones(test.shape[0]), mean),
        np.outer(np.ones(test.shape[0]), std))

with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        encoded_train_labels = tf.one_hot(train_labels, depth = n_classes).eval()
        encoded_val_labels = tf.one_hot(val_labels, depth = n_classes).eval()
        encoded_test_labels = tf.one_hot(test_labels, depth = n_classes).eval()
        
(d, N1, N2) = (0.5, 128, 64)
epochs = 40

model = tf.keras.models.Sequential([
              tf.keras.layers.Dense(N1, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N2, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(n_classes, activation=tf.nn.softmax)
            ])

model.reset_states()

model.compile(optimizer='sgd',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

model.fit(train_n, encoded_train_labels, epochs = epochs)
(loss_m, acc_m) = model.evaluate(val_n, encoded_val_labels)

print(acc_m)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
0.6101694935459202


In [14]:
from sklearn import multiclass
from sklearn.svm import SVC

clf = multiclass.OneVsRestClassifier(SVC(kernel='rbf', random_state = 0))
clf.fit(train_n, train_labels)
acc_SVC = clf.score(val_n, val_labels)

print(np.around(acc_SVC * 100, decimals = 2), "%")

60.34 %


# Fixed Season (Winter)

In [82]:
np.random.seed(0)

size2 = size // 12
    
split = (3 * size2) // 10    
shuffle = np.zeros(size2 * 3, dtype = int)
shuffle[0:size2] = 12 * np.random.permutation(size2)
shuffle[size2: 2 * size2] = 12 * np.random.permutation(size2) + 1 * np.ones(size2, dtype = int) #feb
shuffle[2 * size2: 3 * size2] = 12 * np.random.permutation(size2) + 11 * np.ones(size2, dtype = int) #dec
    
train_ind = np.array(shuffle[0: 8 * split])
val_ind = np.array(shuffle[8 * split: 9 * split])
test_ind = np.array(shuffle[9 * split: size2])

train = np.array(data[train_ind])
train_labels = np.array(labels[train_ind])

val = np.array(data[val_ind])
val_labels = np.array(labels[val_ind])

test = np.array(data[test_ind])
test_labels = np.array(labels[test_ind])
    
mean = np.mean(train, axis = 0)
std = np.std(train, axis = 0)

train_n = np.divide(
        train - np.outer(np.ones(train.shape[0]), mean),
        np.outer(np.ones(train.shape[0]), std))

val_n = np.divide(
        val - np.outer(np.ones(val.shape[0]), mean),
        np.outer(np.ones(val.shape[0]), std))

test_n = np.divide(
        test - np.outer(np.ones(test.shape[0]), mean),
        np.outer(np.ones(test.shape[0]), std))

with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        encoded_train_labels = tf.one_hot(train_labels, depth = n_classes).eval()
        encoded_val_labels = tf.one_hot(val_labels, depth = n_classes).eval()
        encoded_test_labels = tf.one_hot(test_labels, depth = n_classes).eval()
        
(d, N1, N2) = (0.5, 128, 64)
epochs = 30

model = tf.keras.models.Sequential([
              tf.keras.layers.Dense(N1, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N2, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(n_classes, activation=tf.nn.softmax)
            ])

model.reset_states()

sgd = tf.keras.optimizers.SGD(lr=0.01, momentum=0.001, decay=0.001, nesterov=False)

model.compile(optimizer=sgd,
            loss='categorical_crossentropy',
            metrics=['accuracy'])

model.fit(train_n, encoded_train_labels, epochs = epochs)
(loss_m, acc_m) = model.evaluate(val_n, encoded_val_labels)

print(acc_m)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.5280898869707343


In [77]:
from sklearn import multiclass
from sklearn.svm import SVC

clf = multiclass.OneVsRestClassifier(SVC(kernel='rbf', random_state = 0))
clf.fit(train_n, train_labels)
acc_SVC = clf.score(val_n, val_labels)

print(np.around(acc_SVC * 100, decimals = 2), "%")

52.81 %


# Fixed Season (Spring)

In [None]:
np.random.seed(0)

size2 = size // 12
    
split = (3 * size2) // 10    
shuffle = np.zeros(size2 * 3, dtype = int)
shuffle[0:size2] = 12 * np.random.permutation(size2) + 2 * np.ones(size2, dtype = int) # march
shuffle[size2: 2 * size2] = 12 * np.random.permutation(size2) + 3 * np.ones(size2, dtype = int) # april
shuffle[2 * size2: 3 * size2] = 12 * np.random.permutation(size2) + 4 * np.ones(size2, dtype = int) # may
    
train_ind = np.array(shuffle[0: 8 * split])
val_ind = np.array(shuffle[8 * split: 9 * split])
test_ind = np.array(shuffle[9 * split: size2])

train = np.array(data[train_ind])
train_labels = np.array(labels[train_ind])

val = np.array(data[val_ind])
val_labels = np.array(labels[val_ind])

test = np.array(data[test_ind])
test_labels = np.array(labels[test_ind])
    
mean = np.mean(train, axis = 0)
std = np.std(train, axis = 0)

train_n = np.divide(
        train - np.outer(np.ones(train.shape[0]), mean),
        np.outer(np.ones(train.shape[0]), std))

val_n = np.divide(
        val - np.outer(np.ones(val.shape[0]), mean),
        np.outer(np.ones(val.shape[0]), std))

test_n = np.divide(
        test - np.outer(np.ones(test.shape[0]), mean),
        np.outer(np.ones(test.shape[0]), std))

with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        encoded_train_labels = tf.one_hot(train_labels, depth = n_classes).eval()
        encoded_val_labels = tf.one_hot(val_labels, depth = n_classes).eval()
        encoded_test_labels = tf.one_hot(test_labels, depth = n_classes).eval()
        
(d, N1, N2, N3) = (0.3, 768, 768, 384)
epochs = 35

model = tf.keras.models.Sequential([
              tf.keras.layers.Dense(N1, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N2, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(N3, activation=tf.nn.relu),
              tf.keras.layers.Dropout(d),
              tf.keras.layers.Dense(n_classes, activation=tf.nn.softmax)
            ])

model.reset_states()

model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

model.fit(train_n, encoded_train_labels, epochs = epochs)
(loss_m, acc_m) = model.evaluate(val_n, encoded_val_labels)

print(acc_m)

Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
 224/1424 [===>..........................] - ETA: 0s - loss: 0.0266 - acc: 0.9955

In [84]:
from sklearn import multiclass
from sklearn.svm import SVC

clf = multiclass.OneVsRestClassifier(SVC(kernel='rbf', random_state = 0))
clf.fit(train_n, train_labels)
acc_SVC = clf.score(val_n, val_labels)

print(np.around(acc_SVC * 100, decimals = 2), "%")

41.57 %
