# calculate Indian Ocean dipole index


In [6]:
%matplotlib inline
from matplotlib import pyplot as plt

In [7]:
import os
import sys
import pathlib

import numpy as np
import pandas as pd
from datetime import (datetime, timedelta)
from dateutil import parser

In [8]:
import xarray as xr

In [9]:
HOME = pathlib.Path.home()

In [10]:
dpath = HOME.joinpath("research/CPP/indices/data/ERSSTV5")

In [11]:
lfiles_ersst = list(dpath.glob("*.nc"))

In [12]:
lfiles_ersst.sort()

In [13]:
lfiles_ersst[0]

PosixPath('/home/nicolasf/research/CPP/indices/data/ERSSTV5/ersst.v5.185401.nc')

In [14]:
lfiles_ersst[-1]

PosixPath('/home/nicolasf/research/CPP/indices/data/ERSSTV5/ersst.v5.202003.nc')

### keep only 1950 onwards 

In [15]:
start_year = 1950

In [16]:
(start_year - 1854 + 1) * 12

1164

In [17]:
lfiles_ersst = lfiles_ersst[(start_year - 1854) * 12:]

In [39]:
lfiles_ersst.sort()

In [40]:
lfiles_ersst[0]

PosixPath('/home/nicolasf/research/CPP/indices/data/ERSSTV5/ersst.v5.195001.nc')

In [41]:
lfiles_ersst[-1]

PosixPath('/home/nicolasf/research/CPP/indices/data/ERSSTV5/ersst.v5.202003.nc')

In [61]:
d = []
for fname in lfiles_ersst:
#     print(f"opening {fname}")
    dset = xr.open_dataset(fname, decode_times=False)
    dset = dset.squeeze() 
    if 'lev' in dset.coords:
        dset = dset.drop('lev')
    d.append(dset)

In [62]:
dset = xr.concat(d, dim='time')

In [63]:
dset['time'] = (('time'), pd.date_range(start=f'{start_year}-01-01', periods=len(dset['time']), freq='MS'))

In [64]:
def demean(x): 
    return x - x.sel(time=slice('1981','2010')).mean('time')

In [65]:
sst_anoms = dset['sst'].groupby('time.month').apply(demean)

In [66]:
EAST = sst_anoms.sel(lon=slice(90, 110), lat=slice(-10,0)).mean('lat').mean('lon')
WEST = sst_anoms.sel(lon=slice(50, 70), lat=slice(-10,10)).mean('lat').mean('lon')

In [68]:
time = EAST['time'].values

In [69]:
from scipy.stats import zscore

In [70]:
EAST = zscore(EAST.values.flatten())

In [71]:
WEST = zscore(WEST.values.flatten())

In [72]:
DMI = pd.DataFrame({'east':EAST, 'west':WEST}, index=time)

In [73]:
DMI = DMI.assign(dmi = DMI.west - DMI.east)

In [74]:
DMI

Unnamed: 0,east,west,dmi
1950-01-01,-1.265869,-1.271288,-0.005419
1950-02-01,-0.363345,-1.560536,-1.197191
1950-03-01,-0.556284,-1.260499,-0.704215
1950-04-01,-0.863498,-1.202111,-0.338613
1950-05-01,-1.195080,-1.383939,-0.188860
...,...,...,...
2019-11-01,-1.221019,2.671642,3.892661
2019-12-01,0.883495,2.133334,1.249839
2020-01-01,1.927272,2.443030,0.515758
2020-02-01,2.163413,1.652016,-0.511397


In [75]:
opath = pathlib.Path("/media/nicolasf/END19101/data/observational_indices/IOD")

In [76]:
if not opath.exists(): 
    opath.mkdir(parents=True)

In [78]:
DMI = DMI.loc[:,['dmi']]

In [79]:
DMI.columns = ['IOD']

In [80]:
DMI.index.name = 'date'

In [83]:
DMI.index = DMI.index + pd.offsets.MonthEnd(0)

### calculates the seasonal anomalies 

In [84]:
DMI = DMI.rolling(window=3, min_periods=3).mean()

In [85]:
DMI = DMI.iloc[2:,:]

In [86]:
DMI.to_csv(opath.joinpath("IOD.csv"))