In [1]:
import numpy as np
import pandas as pd
import netCDF4 as nc
from datetime import date, timedelta
import os
import xarray as xr

In [3]:
df_lookup = pd.read_csv("../data/processed/predictors/formation.csv", parse_dates=['formation_datetime'])

In [14]:
def get_sst_by_date_offset(ts, lat, lon, days_earlier):

	form_date = ts.date() # Convert from Timestamp to datetime.date
	lookup_date = form_date - timedelta(days = days_earlier)
	# target_prev_sunday = target_date - timedelta(days = (target_date.weekday() + 1) % 7)
	# target_first_of_month = date(target_date.year, target_date.month, 1)

	if lookup_date.year >= 1982:
		path = "OISSTv2hi/sst.day.mean.{year}.nc".format(year = lookup_date.year)
		var_name = 'sst'
		convert = True
	else:
		path = "20CRV3/sst.1851-1981.nc"
		var_name = 'skt'
		convert = False

	with xr.open_dataset(os.path.join("../data", "raw", "sst", path)) as ds:
		sst = ds[var_name].sel(
			lat = lat,
			lon = lon,
			time = lookup_date,
			method = 'nearest'
		).item()

	if convert:
		sst += 273.15 # convert from Â°C to K

	return sst

In [13]:
lead_times = [0, 3, 7, 14]
for lead in lead_times:
	df_lookup[f'sst_minus_{lead}_days'] = df_lookup.apply(axis='columns', func = lambda row: get_sst_by_date_offset(row['formation_datetime'], row['formation_lat'], row['formation_lon'], lead))

In [17]:
results = df_lookup.drop(axis='columns', labels=['formation_lat','formation_lon','formation_datetime'])
results.head()

Unnamed: 0,code,sst_minus_0_days,sst_minus_3_days,sst_minus_7_days,sst_minus_14_days
0,AL011851,300.822784,300.668274,300.664642,300.507751
1,AL011852,301.461853,301.511688,301.491577,301.323486
2,AL011853,300.384827,300.278046,300.207855,300.172699
3,AL011854,301.57254,301.760437,301.408539,300.834229
4,AL011855,300.633118,300.985199,300.907898,301.544464


In [18]:
results.to_csv("../data/processed/predictors/sst.csv", index=False)