In [84]:
import numpy as np
import pandas as pd
import netCDF4 as nc
from datetime import datetime, date, timedelta
import os
import xarray as xr

In [85]:
df = pd.read_csv("data/hurdat2_cleaned.csv", parse_dates=["observation_datetime","formation_datetime"])

In [86]:
df_lookup = df.loc[:,['code','formation_datetime', 'lat', 'lon']].drop_duplicates(keep='first', subset=['code'])

In [88]:
path_template = "{direction}.{year}.nc"
def get_vws_by_date_offset(ts, lat, lon, days_earlier):
	form_date = ts.date() # Convert from Timestamp to datetime.date
	target_date = form_date - timedelta(days = days_earlier)
	target_prev_sunday = target_date - timedelta(days = (target_date.weekday() + 1) % 7)
	target_first_of_month = date(target_date.year, target_date.month, 1)

	if form_date.year >= 2023:
		f = form_date.year
	else: # elif form_date.year >= 1979:
		f = "1979-2022"

	uwnd_path = path_template.format(direction='uwnd',year=f)
	vwnd_path = path_template.format(direction='vwnd',year=f)

	with xr.open_dataset(os.path.join("data","uwnd",uwnd_path)) as ds:
		uwnd_200 = ds['uwnd'].sel(
			lat = lat,
			lon = lon,
			level = 200,
			time = target_date,
			method = 'nearest'
		).item()

		uwnd_850 = ds['uwnd'].sel(
			lat = lat,
			lon = lon,
			level = 850,
			time = target_date,
			method = 'nearest'
		).item()

	with xr.open_dataset(os.path.join("data","vwnd",vwnd_path)) as ds:
		vwnd_200 = ds['vwnd'].sel(
			lat = lat,
			lon = lon,
			level = 200,
			time = target_date,
			method = 'nearest'
		).item()

		vwnd_850 = ds['vwnd'].sel(
			lat = lat,
			lon = lon,
			level = 850,
			time = target_date,
			method = 'nearest'
		).item()

	delta_u = uwnd_200 - uwnd_850
	delta_v = vwnd_200 - vwnd_850
	vws = np.sqrt(delta_u**2 + delta_v**2)
	return vws

In [91]:
lead_times = [0, 3, 7, 14]
for lead in lead_times:
	df_lookup[f'vws_minus_{lead}_days'] = df_lookup.apply(axis='columns', func = lambda row: get_vws_by_date_offset(row['formation_datetime'], row['lat'], row['lon'], lead))
df_lookup.head()

Unnamed: 0,code,formation_datetime,lat,lon,vws_minus_0_days,vws_minus_3_days,vws_minus_7_days,vws_minus_14_days
0,AL011851,1851-06-25 00:00:00,28.0,265.2,35.908143,35.908143,35.908143,35.908143
14,AL021851,1851-07-05 12:00:00,22.2,262.4,29.99821,29.99821,29.99821,29.99821
15,AL031851,1851-07-10 12:00:00,12.0,300.0,38.835399,38.835399,38.835399,38.835399
16,AL041851,1851-08-16 00:00:00,13.4,312.0,39.611651,39.611651,39.611651,39.611651
65,AL051851,1851-09-13 00:00:00,32.5,286.5,39.99749,39.99749,39.99749,39.99749


In [92]:
df_lookup.drop(inplace=True, axis='columns', labels=['lat','lon','formation_datetime'])
df_lookup.head()

Unnamed: 0,code,vws_minus_0_days,vws_minus_3_days,vws_minus_7_days,vws_minus_14_days
0,AL011851,35.908143,35.908143,35.908143,35.908143
14,AL021851,29.99821,29.99821,29.99821,29.99821
15,AL031851,38.835399,38.835399,38.835399,38.835399
16,AL041851,39.611651,39.611651,39.611651,39.611651
65,AL051851,39.99749,39.99749,39.99749,39.99749


In [93]:
df_lookup.to_csv('data/vws.csv', index=False)