In [1]:
import numpy as np
import pandas as pd
import netCDF4 as nc
from datetime import datetime, date, timedelta
import os
import xarray as xr

In [10]:
df_lookup = pd.read_csv('../data/processed/predictors/formation.csv', parse_dates=['formation_datetime'])

In [13]:
file_path = "rhum.{year}.nc"
def get_rhum_by_date_offset(ts, lat, lon, level, days_earlier):
	form_date = ts.date() # Convert from Timestamp to datetime.date
	target_date = form_date - timedelta(days = days_earlier)
	target_prev_sunday = target_date - timedelta(days = (target_date.weekday() + 1) % 7)
	target_first_of_month = date(target_date.year, target_date.month, 1)

	if form_date.year >= 2023:
		f = form_date.year
	elif form_date.year >= 1979:
		f = "1979-2022"
	elif form_date.year >= 1900:
		f = "1900-1978"
	elif form_date.year == 1851:
		f = "1851"
	else:
		f = "1851-1899"

	path = file_path.format(year=f)

	with xr.open_dataset(os.path.join("../data", "raw", "rhum", path)) as ds:
		rhum = ds['rhum'].sel(
			lat = lat,
			lon = lon,
			level = 600,
			time = target_date,
			method = 'nearest'
		).item()

	return rhum


In [14]:
lead_times = [0, 3, 7, 14]
levels = [600]
for lead in lead_times:
	for level in levels:
		df_lookup[f'rhum_minus_{lead}_days'] = df_lookup.apply(axis='columns', func = lambda row: get_rhum_by_date_offset(row['formation_datetime'], row['formation_lat'], row['formation_lon'], level, lead))
df_lookup.head()

Unnamed: 0,code,formation_datetime,formation_lat,formation_lon,rhum_minus_0_days,rhum_minus_3_days,rhum_minus_7_days,rhum_minus_14_days
0,AL011851,1851-06-25 00:00:00,28.0,265.2,58.062843,20.147112,40.271477,25.254938
1,AL011852,1852-08-19 00:00:00,20.5,292.9,79.717346,32.160221,37.715549,43.546444
2,AL011853,1853-08-05 12:00:00,32.5,291.0,69.818024,52.336311,38.80954,48.559959
3,AL011854,1854-06-25 00:00:00,26.0,267.5,65.964127,53.569069,60.588303,64.112236
4,AL011855,1855-08-06 12:00:00,22.2,262.0,71.772766,74.630409,75.832008,60.650848


In [16]:
results = df_lookup.drop(axis='columns', labels=['formation_lat','formation_lon','formation_datetime'])
results.head()

Unnamed: 0,code,rhum_minus_0_days,rhum_minus_3_days,rhum_minus_7_days,rhum_minus_14_days
0,AL011851,58.062843,20.147112,40.271477,25.254938
1,AL011852,79.717346,32.160221,37.715549,43.546444
2,AL011853,69.818024,52.336311,38.80954,48.559959
3,AL011854,65.964127,53.569069,60.588303,64.112236
4,AL011855,71.772766,74.630409,75.832008,60.650848


In [17]:
results.to_csv('../data/processed/rhum.csv', index=False)