In [19]:
import numpy as np
import pandas as pd
import netCDF4 as nc
from datetime import date, timedelta
import os
import xarray as xr

In [20]:
base_dates = {
	"day": date(1800, 1, 1),
	"week": date(1800, 1, 1),
	"month": date(1891, 1, 1)
}

start_dates = {
	"day": date(1981, 9, 1), # to 11/9/25
	"week": date(1981, 9, 6), # to 11/2/25
	"month": date(1850, 1, 1) # to 10/1/25
}

data_file_paths = {
	"day": "OISSTv2hi/sst.day.mean.{year}.nc",
	"week": "OISSTv2hi/sst.week.mean.nc",
	"month": "COBE2/sst.mon.mean.nc"
}

In [21]:
def get_sst_by_date_offset(ts, lat, lon, days_earlier):

	form_date = ts.date() # Convert from Timestamp to datetime.date
	target_date = form_date - timedelta(days = days_earlier)
	# target_prev_sunday = target_date - timedelta(days = (target_date.weekday() + 1) % 7)
	target_first_of_month = date(target_date.year, target_date.month, 1)

	if form_date.year >= 1981:
		temporal_res = 'day'
		lookup_date = target_date
	else:
		temporal_res = 'month'
		lookup_date = target_first_of_month

	path = data_file_paths[temporal_res].format(year=form_date.year)

	with xr.open_dataset(os.path.join("data","sst",path)) as ds:
		sst = ds['sst'].sel(
			lat = lat,
			lon = lon,
			time = lookup_date,
			method = 'nearest'
		).item()

	return sst

In [22]:
df = pd.read_csv("data/hurdat2_cleaned.csv", parse_dates=["observation_datetime","formation_datetime"])

In [23]:
df_lookup = df.loc[:,['code','formation_datetime','lat','lon']].drop_duplicates(keep='first', subset=['code'])
df_lookup.head()

Unnamed: 0,code,formation_datetime,lat,lon
0,AL011851,1851-06-25 00:00:00,28.0,265.2
14,AL021851,1851-07-05 12:00:00,22.2,262.4
15,AL031851,1851-07-10 12:00:00,12.0,300.0
16,AL041851,1851-08-16 00:00:00,13.4,312.0
65,AL051851,1851-09-13 00:00:00,32.5,286.5


In [25]:
lead_times = [0, 3, 7, 14]
for lead in lead_times:
	df_lookup[f'sst_minus_{lead}_days'] = df_lookup.apply(axis='columns', func = lambda row: get_sst_by_date_offset(row['formation_datetime'], row['lat'], row['lon'], lead))

In [26]:
df_lookup.drop(inplace=True, axis='columns', labels=['lat','lon','formation_datetime'])
df_lookup.head()

Unnamed: 0,code,sst_minus_0_days,sst_minus_3_days,sst_minus_7_days,sst_minus_14_days
0,AL011851,27.493002,27.493002,27.493002,27.493002
14,AL021851,28.438002,28.438002,27.738001,27.738001
15,AL031851,27.893002,27.893002,27.893002,27.575001
16,AL041851,27.195002,27.195002,27.195002,27.195002
65,AL051851,26.835001,26.835001,26.835001,27.370001


In [27]:
df_lookup.to_csv("data/sst.csv", index=False)