In [16]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas
import xarray
import cftime
import tensorflow as tf

# Data Import

In [17]:
datapath = 'nino34_monthly.nc'
nino34 = xarray.open_dataset(datapath, decode_times = False)
nino34 = np.array(nino34['nino34'].values)

In [18]:
def ONI(nino34, m = 3):
    oni = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        oni[i] = np.mean(nino34[max(0, (i - m + 1)) : min((i + 1), length)])
    return oni

In [19]:
oni = ONI(nino34)

In [20]:
def climatology(nino34):
    clm = np.zeros(12)
    length = nino34.shape[0]
    for month in range(12):
        section = [12 * i + month for i in range(length // 12)]
        clm[month] = np.mean(nino34[section])
    return clm

In [21]:
clm = climatology(nino34)

In [22]:
def SST_anomaly(nino34, clm):
    anm = np.array(nino34)
    length = nino34.shape[0]
    for i in range(length):
        anm[i] = nino34[i] - clm[i % 12]
    return anm

In [23]:
anm = SST_anomaly(nino34, clm)
oanm = ONI(anm)

# Data Preparation

In [48]:
T = 6                       # prediction timeline
threshold = 0.5         
signal = np.array(nino34[12 * 50:])   # data used for training/testing
length = signal.shape[0]    # number of data points

In [49]:
# label El Nino as 2, El Nina as 0 and no event as 1
labels = np.ndarray((length))
for i in range(length):
    if(oanm[i] >= threshold):
        labels[i] = 2
    elif(oanm[i] <= -threshold):
        labels[i] = 0
    else:
        labels[i] = 1

persistance_labels = 3 * np.ones((length))
for i in range(length - T):
    persistance_labels[i + T] = labels[i]

In [50]:
acc = np.sum(persistance_labels == labels) / (length - T)

In [51]:
print(acc)

0.5118154017236586


# Varying T

In [63]:
max_t = 49

In [64]:
acc = np.zeros(max_t)

In [65]:
for t in np.arange(max_t):
    labels = np.ndarray((length))
    for i in range(length):
        if(oanm[i] >= threshold):
            labels[i] = 2
        elif(oanm[i] <= -threshold):
            labels[i] = 0
        else:
            labels[i] = 1

    persistance_labels = 3 * np.ones((length))
    for i in range(length - t):
        persistance_labels[i + t] = labels[i]
    
    acc[t] = np.sum(persistance_labels == labels) / (length - t)
    print("Persistance benchmark for T =", t, ":", acc[t])

Persistance benchmark for T = 0 : 1.0
Persistance benchmark for T = 1 : 0.878455341019586
Persistance benchmark for T = 2 : 0.7718810780772437
Persistance benchmark for T = 3 : 0.6825065999722106
Persistance benchmark for T = 4 : 0.6093663146192329
Persistance benchmark for T = 5 : 0.5549687282835303
Persistance benchmark for T = 6 : 0.5118154017236586
Persistance benchmark for T = 7 : 0.4751842068677881
Persistance benchmark for T = 8 : 0.44911012235817577
Persistance benchmark for T = 9 : 0.4309553608677514
Persistance benchmark for T = 10 : 0.413769123783032
Persistance benchmark for T = 11 : 0.40422868270969536
Persistance benchmark for T = 12 : 0.3949638286032276
Persistance benchmark for T = 13 : 0.38527897592876026
Persistance benchmark for T = 14 : 0.3750347898691901
Persistance benchmark for T = 15 : 0.3642310368823939
Persistance benchmark for T = 16 : 0.35384187082405344
Persistance benchmark for T = 17 : 0.34679103438674647
Persistance benchmark for T = 18 : 0.3412698412698