In [10]:
import numpy as np
import pandas as pd
import netCDF4 as nc
from datetime import datetime, date, timedelta
import os
import xarray as xr

In [11]:
df_lookup = pd.read_csv('../data/processed/predictors/formation.csv', parse_dates=['formation_datetime'])

In [12]:
path_template = "{direction}.{year}.nc"
def get_vws_by_date_offset(ts, lat, lon, days_earlier):
	form_date = ts.date() # Convert from Timestamp to datetime.date
	target_date = form_date - timedelta(days = days_earlier)
	target_prev_sunday = target_date - timedelta(days = (target_date.weekday() + 1) % 7)
	target_first_of_month = date(target_date.year, target_date.month, 1)

	if form_date.year >= 2023:
		f = form_date.year
	elif form_date.year >= 1979:
		f = "1979-2022"
	else:
		f = "1851-1978"

	uwnd_path = path_template.format(direction='uwnd',year=f)
	vwnd_path = path_template.format(direction='vwnd',year=f)

	with xr.open_dataset(os.path.join("../data", "raw", "uwnd", uwnd_path)) as ds:
		uwnd_200 = ds['uwnd'].sel(
			lat = lat,
			lon = lon,
			level = 200,
			time = target_date,
			method = 'nearest'
		).item()

		uwnd_850 = ds['uwnd'].sel(
			lat = lat,
			lon = lon,
			level = 850,
			time = target_date,
			method = 'nearest'
		).item()

	with xr.open_dataset(os.path.join("../data", "raw", "vwnd", vwnd_path)) as ds:
		vwnd_200 = ds['vwnd'].sel(
			lat = lat,
			lon = lon,
			level = 200,
			time = target_date,
			method = 'nearest'
		).item()

		vwnd_850 = ds['vwnd'].sel(
			lat = lat,
			lon = lon,
			level = 850,
			time = target_date,
			method = 'nearest'
		).item()

	delta_u = uwnd_200 - uwnd_850
	delta_v = vwnd_200 - vwnd_850
	vws = np.sqrt(delta_u**2 + delta_v**2)
	return vws

In [13]:
lead_times = [0, 3, 7, 14]
for lead in lead_times:
	df_lookup[f'vws_minus_{lead}_days'] = df_lookup.apply(axis='columns', func = lambda row: get_vws_by_date_offset(row['formation_datetime'], row['formation_lat'], row['formation_lon'], lead))
df_lookup.head()

Unnamed: 0,code,formation_datetime,formation_lat,formation_lon,vws_minus_0_days,vws_minus_3_days,vws_minus_7_days,vws_minus_14_days
0,AL011851,1851-06-25 00:00:00,28.0,265.2,9.729565,21.045244,18.073967,17.749613
1,AL011852,1852-08-19 00:00:00,20.5,292.9,7.38649,6.904296,10.023103,13.101295
2,AL011853,1853-08-05 12:00:00,32.5,291.0,10.547558,5.77583,7.587143,6.447095
3,AL011854,1854-06-25 00:00:00,26.0,267.5,19.812634,17.166405,21.029163,16.541707
4,AL011855,1855-08-06 12:00:00,22.2,262.0,2.935428,3.440043,0.690577,2.675961


In [14]:
results = df_lookup.drop(axis='columns', labels=['formation_lat','formation_lon','formation_datetime'])
results.head()

Unnamed: 0,code,vws_minus_0_days,vws_minus_3_days,vws_minus_7_days,vws_minus_14_days
0,AL011851,9.729565,21.045244,18.073967,17.749613
1,AL011852,7.38649,6.904296,10.023103,13.101295
2,AL011853,10.547558,5.77583,7.587143,6.447095
3,AL011854,19.812634,17.166405,21.029163,16.541707
4,AL011855,2.935428,3.440043,0.690577,2.675961


In [15]:
results.to_csv('../data/processed/predictors/vws.csv', index=False)