# Nino calculations

In [1]:
# Convenient jupyter setup
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [2]:
# set up plotting settings for dark mode.
from jupyterthemes import jtplot
jtplot.style(theme="grade3", context="notebook", ticks=True, grid=False)
from src.plot_utils import ps_defaults
ps_defaults(use_tex=False, dpi=150)

## Introduction
https://ncar.github.io/PySpark4Climate/tutorials/Oceanic-Ni%C3%B1o-Index/
This notebook will introduce El Niño Index Calculation using PySpark to parallelize a number of tasks like computation of monthly averages for a given grid chunk, etc.

NOAA's operational definitions of El Niño and La Niña conditions are based upon the Oceanic Niño Index [ONI]. The ONI is defined as the 3-month running means of SST anomalies in the Niño 3.4 region [5N-5S, 120-170W].

The ONI is one measure of the El Niño-Southern Oscillation, and other indices can confirm whether features consistent with a coupled ocean-atmosphere phenomenon accompanied these periods.

## Computational Recipe
Compute area averaged total SST from Niño 3.4 region.
Compute monthly climatology (1854 - 2016) for area averaged total SST from Niño 3.4 region, and subtract climatology from area averaged total SST time series to obtain anomalies.
Smooth the anomalies with a 3-month running mean.


In [6]:
import os
import numpy as np
import xarray as xr
import dask
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
from src.xr_utils import sel, open_dataset
from src.constants import OCEAN_DATA_PATH, OCEAN_OUTPUT_PATH

In [7]:
from src.data_loading.download import get_noaa_data

get_noaa_data()

Dowloading /home/users/sithom/seager19/src/data/NOAA_NCDC_ERSST_v3b_SST.nc


184959it [00:03, 51931.00it/s]


'get_zip'  4.81467 s 

'un_zip'  0.72239 s 

'clean_up'  0.01031 s 

'get_and_unzip'  5.54827 s 

/home/users/sithom/seager19/src/data/NOAA_NCDC_ERSST_v3b_SST.nc created.


In [8]:
from src.constants import NOAA_DATA_PATH

In [10]:
xr.open_dataset(NOAA_DATA_PATH)

In [4]:
# def get_region_of_interest(dset: xr.Dataset) -> xr.Dataset:
#     return dset.sel(lat=slice(-6, 6), lon=slice(190, 240))

In [4]:
os.listdir(OCEAN_OUTPUT_PATH)

['om_spin.20y.restart',
 'om_run2f.nc',
 'om_spin.nc',
 'om_spin.save',
 'om_run2f.save',
 'om_diag.2y.restart',
 'om_diag.nc',
 'om_diag.save',
 'om_run2f_T1.nc',
 'om_spin_T1.nc',
 'om_spin_T2.nc',
 'om_diag_T2.nc',
 'om_run2f_T2.nc',
 'om_spin_T3.nc',
 'om_diag_T3.nc',
 'om_run2f_T3.nc']

In [5]:
open_dataset(OCEAN_OUTPUT_PATH / "om_run2f.nc").SST_SST

In [19]:
print(open_dataset(OCEAN_OUTPUT_PATH / "om_spin.nc"))

<xarray.Dataset>
Dimensions:       (GRID: 1, L_01: 1, L_02: 1, L_03: 1, T_01: 241, T_02: 241, T_03: 241, X_01: 161, X_02: 360, X_03: 360, Y_01: 360, Y_02: 161, Y_03: 161)
Coordinates: (12/13)
  * GRID          (GRID) float32 0.0
  * X_01          (X_01) float32 -80.0 -79.0 -78.0 -77.0 ... 77.0 78.0 79.0 80.0
  * Y_01          (Y_01) float32 0.0 1.0 2.0 3.0 4.0 ... 356.0 357.0 358.0 359.0
  * L_01          (L_01) float32 1.0
  * T_01          (T_01) object 1937-12-30 10:59:31.875000 ... 1957-12-28 16:...
  * X_02          (X_02) float32 0.0 1.0 2.0 3.0 4.0 ... 356.0 357.0 358.0 359.0
    ...            ...
  * L_02          (L_02) float32 0.0
  * T_02          (T_02) object 1937-12-30 10:59:31.875000 ... 1957-12-28 16:...
  * X_03          (X_03) float32 0.0 1.0 2.0 3.0 4.0 ... 356.0 357.0 358.0 359.0
  * Y_03          (Y_03) float32 -80.0 -79.0 -78.0 -77.0 ... 77.0 78.0 79.0 80.0
  * L_03          (L_03) float32 1.0
  * T_03          (T_03) object 1937-12-30 10:59:31.875000 ... 1957-12

In [5]:
def global_mean_sst(dset: xr.Dataset) -> xr.Dataset:
    # Find mean temperature for each latitude
    mean_sst_lat = dset.sst.mean(dim='lon')

    # Find Weighted mean of those values
    num =(np.cos(dset.lat) * mean_sst_lat).sum(dim='lat')
    denom = np.sum(np.cos(dset.lat))

    # Find mean global temperature
    mean_global_temp = num / denom

    return mean_global_temp

In [None]:
rdd = reader.nc_multi_read(sc, paths, data_splitting_mode="slice")
region_of_interest = rdd.map(get_region_of_interest).cache()
mean_global_temp = region_of_interest.map(global_mean_sst)
mean_sst = mean_global_temp.collect()
a = xr.concat(mean_sst[:1956], dim="time")

In [None]:
ds = a.to_dataset(name="mean_sst")
climatology = ds.groupby('time.month').mean('time')
climatology
anomalies = ds.groupby('time.month') - climatology
anomalies