In [1]:
import numpy as np
import pandas as pd
import netCDF4 as nc
from datetime import datetime, date, timedelta
import os
import xarray as xr

In [2]:
df = pd.read_csv("data/hurdat2_cleaned.csv", parse_dates=["observation_datetime","formation_datetime"])

In [3]:
df_lookup = df.loc[:,['code','formation_datetime', 'lat', 'lon']].drop_duplicates(keep='first', subset=['code'])

In [4]:
file_path = "rhum.{year}.nc"
def get_rhum_by_date_offset(ts, lat, lon, days_earlier):
	form_date = ts.date() # Convert from Timestamp to datetime.date
	target_date = form_date - timedelta(days = days_earlier)
	target_prev_sunday = target_date - timedelta(days = (target_date.weekday() + 1) % 7)
	target_first_of_month = date(target_date.year, target_date.month, 1)

	if form_date.year >= 2023:
		f = form_date.year
	elif form_date.year >= 1979:
		f = "1979-2022"
	elif form_date.year >= 1900:
		f = "1900-1978"
	else:
		f = "1851-1899"

	path = file_path.format(year=f)

	with xr.open_dataset(os.path.join("data","rhum",path)) as ds:
		rhum = ds['rhum'].sel(
			lat = lat,
			lon = lon,
			level = 600,
			time = target_date,
			method = 'nearest'
		).item()

	return rhum


In [5]:
lead_times = [0, 3, 7, 14]
for lead in lead_times:
	df_lookup[f'rhum_minus_{lead}_days'] = df_lookup.apply(axis='columns', func = lambda row: get_rhum_by_date_offset(row['formation_datetime'], row['lat'], row['lon'], lead))
df_lookup.head()

Unnamed: 0,code,formation_datetime,lat,lon,rhum_minus_0_days,rhum_minus_3_days,rhum_minus_7_days,rhum_minus_14_days
0,AL011851,1851-06-25 00:00:00,28.0,265.2,58.062843,58.062843,58.062843,58.062843
14,AL021851,1851-07-05 12:00:00,22.2,262.4,74.068085,78.490578,55.125404,37.810688
15,AL031851,1851-07-10 12:00:00,12.0,300.0,65.337807,44.401619,43.306999,32.944099
16,AL041851,1851-08-16 00:00:00,13.4,312.0,68.846222,48.393429,51.26202,58.588402
65,AL051851,1851-09-13 00:00:00,32.5,286.5,40.366375,42.911156,71.011948,44.569759


In [6]:
df_lookup.drop(inplace=True, axis='columns', labels=['lat','lon','formation_datetime'])
df_lookup.head()

Unnamed: 0,code,rhum_minus_0_days,rhum_minus_3_days,rhum_minus_7_days,rhum_minus_14_days
0,AL011851,58.062843,58.062843,58.062843,58.062843
14,AL021851,74.068085,78.490578,55.125404,37.810688
15,AL031851,65.337807,44.401619,43.306999,32.944099
16,AL041851,68.846222,48.393429,51.26202,58.588402
65,AL051851,40.366375,42.911156,71.011948,44.569759


In [7]:
df_lookup.to_csv('data/rhum.csv', index=False)