# Calculate predictors

- ERSST Predictors (6)
    * SIOD_E
    * SIOD_W
    * SST_Med
    * TSA
    * TNA
    * SST_mdr

- SATA Predictors (4)
    * lnh
    * lsh
    * onh
    * osh

- 2-D (year x month) textfiles (10)
    * SOI (2)
    * AMO
    * NAO
    * PDO
    * NP
    * Ninos (4)

#### Import packages

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

# Temporal Range for all indices:

Months: AMJ\
Years: 1901 - 2017

In [2]:
month_s = 4  # April
month_e = 6  # June
year_s = 1901
year_e = 2017

# ERSST Predictors (6)

### Define function to select subset, calculate mean, and save to new file

This function creates one seasonal value for each year that is regionally averaged.\
It uses raw data - not anomalies or detrended.

Can Handle Lon = 0:359 if area crosses 180 meridian\
Or Lon = -180:179 if are crosses 0 median

Missing values are already nan

In [3]:
def index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
):
    ersst = xr.open_dataset("da_my_ersst.nc").sst

    if np.sign(lon_s) == -1:  # test for input longitude sign and eventually reshape dataset
        i = ersst.lon
        ersst = xr.concat(
            [ersst.sel(lon=i[(i >= 180)]), ersst.sel(lon=i[(i < 180)])], dim="lon"
        )
        ersst = ersst.assign_coords({"lon": (((ersst.lon + 180) % 360) - 180)})
        print("data has been reshaped")

    # Now call data in given time and space range and compute means, then save data array to new file

    ersst = (
        ersst.sel(
            time=(ersst["time.month"] >= month_s)
            & (ersst["time.month"] <= month_e)
            & (ersst["time.year"] >= year_s)
            & (ersst["time.year"] <= year_e),
            lat=slice(lat_s, lat_e),
            lon=slice(lon_s, lon_e),
        )
        .groupby("time.year")
        .mean(("time", "lat", "lon"))
    )
    ersst.to_netcdf(f"da_pred_{name}.nc")
    return ersst

### SIOD_E - (Eastern Suntropical Indian Ocean)

In [4]:
name = "siod_e"
lat_s = -28
lat_e = -18
lon_s = 90
lon_e = 100

siod_e = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### SIOD_W - (Western Subtropical Indian Ocean)

In [5]:
name = "siod_w"
lat_s = -37
lat_e = -27
lon_s = 55
lon_e = 65

siod_w = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### SST_Med - (Mediterranean Sea)

In [6]:
name = "sst_med"
lat_s = 30
lat_e = 45
lon_s = 0
lon_e = 25

sst_med = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

### TSA - (Tropical South Atlantic)

In [7]:
name = "tsa"
lat_s = -20
lat_e = 0
lon_s = -30
lon_e = 10

tsa = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


### TNA - (Tropical North Atlantic)

In [8]:
name = "tna"
lat_s = 5
lat_e = 25
lon_s = -55
lon_e = -15

tna = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


### SST_MDR - (Hurricane main development region)

In [9]:
name = "sst_mdr"
lat_s = 10
lat_e = 20
lon_s = -85
lon_e = -20

tna = index_from_ersst(
    name, month_s, month_e, year_s, year_e, lat_s, lat_e, lon_s, lon_e
)

data has been reshaped


# SATA Predictors (4)
This data comes in global monthly means.\
Here according seasonal means are computed

Load the four sata files and merge them together in one xarray ds and set missing values to NAN

In [10]:
# function to laod csv files as pandas df and renames index and variable
def load_sata(name):
    a = (
        pd.read_csv(f"da_o_sata_{name}.csv", header=4)
        .rename(columns={"Value": f"sata_{name}", "Year": "time"})
        .set_index("time")
    )
    return a


# use function to load and concat the four time series
sata = pd.concat(
    [load_sata("lnh"), load_sata("lsh"), load_sata("onh"), load_sata("osh")], axis=1
).to_xarray()


#set missing values (-999) to nan
sata = sata.where(sata != -999)

Select temporal range, compute seasonal means and save data to new file

In [12]:
# assign daterange time coordinate for better handling
sata = sata.assign_coords(
    time=("time", pd.date_range("1880-1-1", "2020-10-1", freq="MS"))
)

# select time range, compute seasonal mean
sata = (
    sata.sel(
        time=(sata["time.month"] >= month_s)
        & (sata["time.month"] <= month_e)
        & (sata["time.year"] >= year_s)
        & (sata["time.year"] <= year_e),
    )
    .groupby("time.year")
    .mean("time")
)

# Save data to new file
sata.to_netcdf("da_pred_sata_all.nc")

# 2-D (year x month) text data (10)

- SOI (2)
- AMO
- NAO
- ninos (4)
- PDO
- NP

### For each data file:
1. Import Data to Xarray 

# SOI (Souther Oscillation Index) (2)
## Darwin and Tahiti SLP

Load the two text files into one Xarray Data Set and set coordinate axes:

In [13]:
soi = xr.Dataset(
    data_vars={
        "slp_darwin": (("year", "month"), np.loadtxt("da_o_soi_dar.txt")[:, 1:]),
        "slp_tahiti": (("year", "month"), np.loadtxt("da_o_soi_tah.txt")[:, 1:]),
    },
    coords={
        "year": np.arange(1855, 2021),
        "month": np.arange(1, 13),
    },
    attrs={"unit": "hPa"},
)

#set missing values (-990) to nan
soi = soi.where(soi != -990)

Select temporal range, compute seasonal means and save data to new file:

In [14]:
# select time range, compute seasonal mean
i = soi.month
ii = soi.year
soi = soi.sel(
    month=i[(i >= month_s) & (i <= month_e)],
    year=ii[(ii >= year_s) & (ii <= year_e)],
).mean('month')

# Save data to new file
soi.to_netcdf("da_pred_soi_all.nc")

# AMO (Atlantic Multidecadal Oscillation) (1)

Load the text files into one Xarray Data Set and set coordinate axes:

In [15]:
amo = xr.DataArray(
    data=np.loadtxt(open('da_o_amo_detrend.txt').readlines()[:-4], skiprows=1)[:,1:],  # use nested open to skip bottom lines string text, and first column (years)
    dims=("year", "month"),
    name="amo",
    coords={
        "year": np.loadtxt(open('da_o_amo_detrend.txt').readlines()[:-4], skiprows=1)[:,0],
        "month": np.arange(1, 13),
    },
    attrs={"unit": "degree C"},
)

# set non-values (-99.99) to NAN 
amo = amo.where(amo != -99.99)

Select temporal range, compute seasonal means and save data to new file:

In [16]:
# select time range, compute seasonal mean

i = amo.month
ii = amo.year

amo = amo.sel(
    month=i[(i >= month_s) & (i <= month_e)],
    year=ii[(ii >= year_s) & (ii <= year_e)],
).mean('month')

# Save data to new file
amo.to_netcdf("da_pred_amo.nc")

In [17]:
amo

In [68]:
a = xr.open_dataset('da_my_ersst.nc').sst
a

In [69]:
b = a[1600, 0, :, 5]

In [70]:
b

In [73]:
a = xr.open_dataset('da_o_globaltempv5.nc').anom

In [74]:
a

In [75]:
b = a[1600, 0, :, 5]

In [76]:
b

In [81]:
rf = np.loadtxt("da_o_sahelprecip19012017.txt", skiprows=8,)

In [89]:
np.argwhere(np.isnan(rf))


array([], shape=(0, 2), dtype=int64)

In [30]:
x = xr.DataArray([0, 7, 1, np.nan, 2], dims=["x"])

In [31]:
x.mean()