# Calculate predictors

#### Import packages

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

# Temporal Range for all indices:

Months: AMJ\
Years: 1901 - 2017

In [2]:
month_s = 4  # April
month_e = 6  # June
year_s = 1901
year_e = 2017

# ERSST Predictors (6)

### Define function to select subset, calculate mean, and save to new file

This function creates one seasonal value for each year that is regionally averaged.\
It uses raw data - not anomalies or detrended.

Can Handle Lon = 0:359 if area crosses 180 meridian\
Or Lon = -180:179 if are crosses 0 median

In [3]:
def index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
):
    ersst = xr.open_dataset("da_my_ersst.nc").sst

    if np.sign(lon_s) == -1:  # test for input longitude and eventually reshape dataset
        i = ersst.lon
        ersst = xr.concat(
            [ersst.sel(lon=i[(i >= 180)]), ersst.sel(lon=i[(i < 180)])], dim="lon"
        )
        ersst = ersst.assign_coords({"lon": (((ersst.lon + 180) % 360) - 180)})
        print("data has been reshaped")

    # Now call data in given time and space range and compute means, then save data array to new file

    ersst = (
        ersst.sel(
            time=(ersst["time.month"] >= month_s)
            & (ersst["time.month"] <= month_e)
            & (ersst["time.year"] >= year_s)
            & (ersst["time.year"] <= year_e),
            lat=slice(lat_s, lat_e),
            lon=slice(lon_s, lon_e),
        )
        .groupby("time.year")
        .mean(("time", "lat", "lon"))
    )
    ersst.to_netcdf(f"da_pred_{name}.nc")
    return ersst

### SIOD_E - (Eastern Suntropical Indian Ocean)

In [4]:
name = "siod_e"
lat_s = -28
lat_e = -18
lon_s = 90
lon_e = 100

siod_e = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### SIOD_W - (Western Subtropical Indian Ocean)

In [5]:
name = "siod_w"
lat_s = -37
lat_e = -27
lon_s = 55
lon_e = 65

siod_w = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### SST_Med - (Mediterranean Sea)

In [6]:
name = "sst_med"
lat_s = 30
lat_e = 45
lon_s = 0
lon_e = 25

sst_med = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### TSA - (Tropical South Atlantic)

In [7]:
name = "tsa"
lat_s = -20
lat_e = 0
lon_s = -30
lon_e = 10

tsa = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


### TNA - (Tropical North Atlantic)

In [8]:
name = "tna"
lat_s = 5
lat_e = 25
lon_s = -55
lon_e = -15

tna = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


### SST_MDR - (Hurricane main development region)

In [9]:
name = "sst_mdr"
lat_s = 10
lat_e = 20
lon_s = -85
lon_e = -20

tna = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


# SATA Predictors (4)
This data comes in global monthly means.\
Here according seasonal means are computed

Load the four sata files and merge them together in one xarray ds

In [10]:
# function to laod csv files as pandas df and renames index and variable
def load_sata(name):
    a = (
        pd.read_csv(f"da_o_sata_{name}.csv", header=4)
        .rename(columns={"Value": f"sata_{name}", "Year": "time"})
        .set_index("time")
    )
    return a

# use function to load and concat the four time series
sata = pd.concat(
    [load_sata("lnh"), load_sata("lsh"), load_sata("onh"), load_sata("osh")], axis=1
).to_xarray()

Select temporal range, compute seasonal means and save data to new file

In [11]:
# assign daterange time coordinate for better handling
sata = sata.assign_coords(
    time=("time", pd.date_range("1880-1-1", "2020-10-1", freq="MS"))
)

# select time range, compute seasonal mean
sata = (
    sata.sel(
        time=(sata["time.month"] >= month_s)
        & (sata["time.month"] <= month_e)
        & (sata["time.year"] >= year_s)
        & (sata["time.year"] <= year_e),
    )
    .groupby("time.year")
    .mean("time")
)

# Save data to new file
sata.to_netcdf('da_pred_sata_all.nc')

# SOI (Souther Oscillation Index)
## Darwin and Tahiti SLP