In [None]:
import xarray as xr
import numpy as np
import pandas as pd

import os
precision = 'float32'
DATA_DIR = '/opt/nautilus_dataset/cdf_files/indices/'

first_date, last_date = '1979-01-01', '2021-07-25'
df = pd.DataFrame(index=pd.date_range(first_date, last_date)).rename_axis('date')

In [None]:
def get_tabular_data(index):
    index_file = os.path.join(DATA_DIR, index+'.txt')
    raw_data = pd.read_csv(index_file).values
    data = []
    for i in range(0, len(raw_data)):
        elems = raw_data[i].item().split()
        elems = [float(el) for el in elems]
        elems[0] = int(elems[0])
        data.append(elems)
    df = pd.DataFrame(data)
    df = df.set_index(0).rename_axis('year')
    return df

### Indices: MEIv2, NAO, AO, ONI, NINO3

In [None]:
for scalar in ['MEIv2', 'NAO', 'AO', 'ONI', 'NINO34']:
    df_scalar = get_tabular_data(scalar)
    df[scalar] = 0.0
    for year in df.index.year.unique():
        for month in df.index.month.unique():
            df[scalar][(df.index.year==year) & (df.index.month==month)] \
            = df_scalar[df_scalar.index==year][month].values.item()

### MJO

In [None]:
index_file = os.path.join(DATA_DIR, 'MJO.txt')
raw_data = pd.read_csv(index_file).values
data = []
for i in range(0, len(raw_data)):
    elems = raw_data[i].item().split()[:-1]
    elems = [float(el) for el in elems]
    elems[0] = int(elems[0])
    data.append(elems)

cols = ['year','month','day','RMM1','RMM2','phase','amplitude']
df_mjo = pd.DataFrame(data).rename(columns={x: y for x,y in zip(range(7), cols)})
df_mjo = df_mjo[(df_mjo['year']>=1979) & (df_mjo['year']<=2021)]
df_mjo.set_index(np.arange(len(df_mjo)), inplace=True)
df[['MJO_rmm1','MJO_rmm2','MJO_phase','MJO_amp']] \
    = df_mjo[['RMM1','RMM2','phase','amplitude']].values[:df.shape[0]]

In [None]:
df.to_hdf(os.path.join(DATA_DIR, 'indices.h5'), key = 'df')