# Calculate predictors

#### ERSST Predictors (6)
* SIOD_E
* SIOD_W
* SST_Med
* TSA
* TNA
* SST_mdr

#### SATA Predictors (4)
* lnh
* lsh
* onh
* osh

#### 2-D (year x month) textfiles (10)
* SOI (2)
* AMO
* NAO
* PDO
* NP
* Ninos (4)

### Import packages

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

# Temporal Range for all indices:

Months: AMJ\
Years: 1901 - 2017

In [2]:
month_s = 4  # April
month_e = 6  # June
year_s = 1901
year_e = 2017

# ERSST Predictors (6)

### Define function to select subset, calculate mean, and save to new file

This function creates one seasonal value for each year that is regionally averaged.\
It uses raw data - not anomalies or detrended.

Can Handle Lon = 0:359 if area crosses 180 meridian\
Or Lon = -180:179 if are crosses 0 median

Missing values are already **nan**

In [4]:
def index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
):
    ersst = xr.open_dataset("data/da_my_ersst.nc").sst
    ersst = ersst.rename(name).squeeze().drop("lev")
    if np.sign(lon_s) == -1:  # test for input longitude sign and eventually reshape dataset
        i = ersst.lon
        ersst = xr.concat(
            [ersst.sel(lon=i[(i >= 180)]), ersst.sel(lon=i[(i < 180)])], dim="lon"
        )
        ersst = ersst.assign_coords({"lon": (((ersst.lon + 180) % 360) - 180)})
        print("data has been reshaped")

    # Now call data in given time and space range and compute means, then save data array to new file

    ersst = (
        ersst.sel(
            time=(ersst["time.month"] >= month_s)
            & (ersst["time.month"] <= month_e)
            & (ersst["time.year"] >= year_s)
            & (ersst["time.year"] <= year_e),
            lat=slice(lat_s, lat_e),
            lon=slice(lon_s, lon_e),
        )
        .groupby("time.year")
        .mean(("time", "lat", "lon"))
    )
    ersst.to_netcdf(f"data/da_pred_{name}.nc")
    return ersst

### SIOD_E - (Eastern Suntropical Indian Ocean)

In [5]:
name = "siod_e"
lat_s = -28
lat_e = -18
lon_s = 90
lon_e = 100

siod_e = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### SIOD_W - (Western Subtropical Indian Ocean)

In [6]:
name = "siod_w"
lat_s = -37
lat_e = -27
lon_s = 55
lon_e = 65

siod_w = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### SST_Med - (Mediterranean Sea)

In [7]:
name = "sst_med"
lat_s = 30
lat_e = 45
lon_s = 0
lon_e = 25

sst_med = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### TSA - (Tropical South Atlantic)

In [8]:
name = "tsa"
lat_s = -20
lat_e = 0
lon_s = -30
lon_e = 10

tsa = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


### TNA - (Tropical North Atlantic)

In [9]:
name = "tna"
lat_s = 5
lat_e = 25
lon_s = -55
lon_e = -15

tna = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


### SST_MDR - (Hurricane main development region)

In [10]:
name = "sst_mdr"
lat_s = 10
lat_e = 20
lon_s = -85
lon_e = -20

sst_mdr = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


# SATA Predictors (4)
#### Surface Air Temperature Anomaly:
* Land Northern Hemisphere (LNH)
* Land Southern Hemisphere (LSH)
* Ocena Northern Hemisphere (ONH)
* Ocean Southern Hemisphere (OSH)

This data comes in global monthly means.\
Here the according seasonal means are computed

Load the four SATA files and merge them together in one xarray ds and **set missing values to NAN**

In [11]:
# function to laod csv files as pandas df and renames index and variable
def load_sata(name):
    a = (
        pd.read_csv(f"data/da_o_sata_{name}.csv", header=4)
        .rename(columns={"Value": f"sata_{name}", "Year": "time"})
        .set_index("time")
    )
    return a


# use function to load and concat the four time series
sata = pd.concat(
    [load_sata("lnh"), load_sata("lsh"), load_sata("onh"), load_sata("osh")], axis=1
).to_xarray()


#set missing values (-999) to nan
sata = sata.where(sata != -999)

Select temporal range, compute seasonal means and save data to new file

In [12]:
# assign daterange time coordinate for better handling
sata = sata.assign_coords(
    time=("time", pd.date_range("1880-1-1", "2020-10-1", freq="MS"))
)

# select time range, compute seasonal mean
sata = (
    sata.sel(
        time=(sata["time.month"] >= month_s)
        & (sata["time.month"] <= month_e)
        & (sata["time.year"] >= year_s)
        & (sata["time.year"] <= year_e),
    )
    .groupby("time.year")
    .mean("time")
)

# Save data to new file
sata.to_netcdf("datada_pred_sata_all.nc")


# 2-D (year x month) text data (10)

- SOI (2)
- AMO
- NAO
- ninos (4)
- PDO
- NP

## Workflow for each data file:

#### 1. Load the two text files into one Xarray Data Set and set coordinate axes:
#### 2. Set missing values to **NAN**
#### 3. Apply Select and Save function

In [13]:
# select time range, compute seasonal mean, save data to predictoer file
def sel_seasonal_pred(var, file_name):
    
    i = var.month
    ii = var.year
    
    var = var.sel(
        month=i[(i >= month_s) & (i <= month_e)],
        year=ii[(ii >= year_s) & (ii <= year_e)],
    ).mean('month')
    
    # Save data to new file
    var.to_netcdf(f"data/da_pred_{file_name}.nc")
    return var

# SOI (Souther Oscillation Index) (2)
## Darwin and Tahiti SLP

In [14]:
# load both files into one xarray
soi = xr.Dataset(
    data_vars={
        "slp_darwin": (("year", "month"), np.loadtxt("data/da_o_soi_dar.txt")[:, 1:]),
        "slp_tahiti": (("year", "month"), np.loadtxt("data/da_o_soi_tah.txt")[:, 1:]),
    },
    coords={
        "year": np.arange(1855, 2021),
        "month": np.arange(1, 13),
    },
    attrs={"unit": "hPa"},
)





#set missing values (-990) to nan
soi = soi.where(soi != -990)





#Use function to: Select temporal range, compute seasonal means and save data to new file
soi = sel_seasonal_pred(soi, 'soi_all')

# AMO (Atlantic Multidecadal Oscillation) (1)

In [15]:
# load file into xarray
amo = xr.DataArray(
    data=np.loadtxt(open('data/da_o_amo_detrend.txt').readlines()[:-4], skiprows=1)[:,1:],  # use nested open to skip bottom lines string text, and first column (years)
    dims=("year", "month"),
    name="amo",
    coords={
        "year": np.loadtxt(open('data/da_o_amo_detrend.txt').readlines()[:-4], skiprows=1)[:,0],
        "month": np.arange(1, 13),
    },
    attrs={"unit": "degree C"},
)




#set missing values (-99.99) to nan
amo = amo.where(amo != -99.99)




#Use function to: Select temporal range, compute seasonal means and save data to new file
amo = sel_seasonal_pred(amo, 'amo')

# NAO (North Atlantik Oscillation)(1)

In [16]:
# load file into xarray
nao = xr.DataArray(
    data=np.loadtxt(open('data/da_o_nao.txt').readlines()[:-6], skiprows=1)[:,1:],  # use nested open to skip bottom lines string text, and first column (years)
    dims=("year", "month"),
    name="nao",
    coords={
        "year": np.loadtxt(open('data/da_o_nao.txt').readlines()[:-6], skiprows=1)[:,0],
        "month": np.arange(1, 13),
    },
    attrs={"unit": "index"},
)




#set missing values (-99.99) to nan
nao = nao.where(nao != -99.99)




#Use function to: Select temporal range, compute seasonal means and save data to new file
nao = sel_seasonal_pred(nao, 'nao')

# NINO (4)
## 1.2 , 3 , 3.4 and 4

In [17]:
# load all 4 files into one xarray
nino = xr.Dataset(
    data_vars={
        "nino12": (("year", "month"), np.loadtxt(open('data/da_o_nino12.txt').readlines()[:-5], skiprows=1)[:,1:]),
        "nino3": (("year", "month"), np.loadtxt(open('data/da_o_nino3.txt').readlines()[:-5], skiprows=1)[:,1:]),
        "nino34": (("year", "month"), np.loadtxt(open('data/da_o_nino34.txt').readlines()[:-5], skiprows=1)[:,1:]),
        "nino4": (("year", "month"), np.loadtxt(open('data/da_o_nino4.txt').readlines()[:-5], skiprows=1)[:,1:])
    },
    coords={
        "year": np.loadtxt(open('data/da_o_nino12.txt').readlines()[:-5], skiprows=1)[:,0],
        "month": np.arange(1, 13),
    },
    attrs={"unit": "SST"},
)





#set missing values (-99.99) to nan
nino = nino.where(nino != -99.99)





#Use function to: Select temporal range, compute seasonal means and save data to new file
nino = sel_seasonal_pred(nino, 'nino_all')

# PDO (Pacific Decadal Oscillation)(1)

In [18]:
# load file into xarray
pdo = xr.DataArray(
    data=np.loadtxt(open('data/da_my_pdo_jisao.txt').readlines()[:-12], skiprows=32)[:,1:],  # use nested open to skip bottom lines string text, and first column (years)
    dims=("year", "month"),
    name="pdo",
    coords={
        "year": np.loadtxt(open('data/da_my_pdo_jisao.txt').readlines()[:-12], skiprows=32)[:,0],
        "month": np.arange(1, 13),
    },
    attrs={"unit": "index"},
)





#set missing values (-99.99) to nan
pdo = pdo.where(pdo != -99.99)





#Use function to: Select temporal range, compute seasonal means and save data to new file
pdo = sel_seasonal_pred(pdo, 'pdo')

# NP (North Pacific Pattern)(1)

In [19]:
# load file into xarray
np = xr.DataArray(
    data=np.loadtxt(open('data/da_o_np.txt').readlines()[:-6], skiprows=1)[:,1:],  # use nested open to skip bottom lines string text, and first column (years)
    dims=("year", "month"),
    name="np",
    coords={
        "year": np.loadtxt(open('data/da_o_np.txt').readlines()[:-6], skiprows=1)[:,0],
        "month": np.arange(1, 13),
    },
    attrs={"unit": "index"},
)





#set missing values (-999.00) to nan
np = np.where(np != -999.00)





#Use function to: Select temporal range, compute seasonal means and save data to new file
np = sel_seasonal_pred(np, 'np')

# Merge and save all Variables

In [24]:
xr.merge([
    siod_e, 
    siod_w,
    sst_med, 
    tsa, 
    tna, 
    sst_mdr, 
    sata, 
    soi, 
    amo, 
    nao, 
    pdo, 
    np, 
    nino
]).to_netcdf("data/da_pred_all.nc")