# Analysis

In [1]:
# Convenient jupyter setup
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [2]:
# set up plotting settings for dark mode.
from jupyterthemes import jtplot
jtplot.style(theme="grade3", context="notebook", ticks=True, grid=False)
from src.plot_settings import ps_defaults
ps_defaults(use_tex=False, dpi=150)

## Introduction
https://ncar.github.io/PySpark4Climate/tutorials/Oceanic-Ni%C3%B1o-Index/
This notebook will introduce El Niño Index Calculation using PySpark to parallelize a number of tasks like computation of monthly averages for a given grid chunk, etc.

NOAA's operational definitions of El Niño and La Niña conditions are based upon the Oceanic Niño Index [ONI]. The ONI is defined as the 3-month running means of SST anomalies in the Niño 3.4 region [5N-5S, 120-170W].

The ONI is one measure of the El Niño-Southern Oscillation, and other indices can confirm whether features consistent with a coupled ocean-atmosphere phenomenon accompanied these periods.

## Computational Recipe
Compute area averaged total SST from Niño 3.4 region.
Compute monthly climatology (1854 - 2016) for area averaged total SST from Niño 3.4 region, and subtract climatology from area averaged total SST time series to obtain anomalies.
Smooth the anomalies with a 3-month running mean.


In [3]:
import numpy as np
import xarray as xr
import dask
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
# def get_region_of_interest(dset: xr.Dataset) -> xr.Dataset:
#     return dset.sel(lat=slice(-6, 6), lon=slice(190, 240))

In [None]:
from src.xr_utils import 

In [5]:
def global_mean_sst(dset: xr.Dataset) -> xr.Dataset:
    # Find mean temperature for each latitude
    mean_sst_lat = dset.sst.mean(dim='lon')

    # Find Weighted mean of those values
    num =(np.cos(dset.lat) * mean_sst_lat).sum(dim='lat')
    denom = np.sum(np.cos(dset.lat))

    # Find mean global temperature
    mean_global_temp = num / denom

    return mean_global_temp

In [None]:
rdd = reader.nc_multi_read(sc, paths, data_splitting_mode="slice")
region_of_interest = rdd.map(get_region_of_interest).cache()
mean_global_temp = region_of_interest.map(global_mean_sst)
mean_sst = mean_global_temp.collect()
a = xr.concat(mean_sst[:1956], dim="time")

In [None]:
ds = a.to_dataset(name="mean_sst")
climatology = ds.groupby('time.month').mean('time')
climatology
anomalies = ds.groupby('time.month') - climatology
anomalies