In [87]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [88]:
KM_PER_NM = 1.852
def calculate_ellipse_area(r_ne, r_se, r_sw, r_nw):
	if any(np.isnan(x) for x in [r_ne, r_se, r_sw, r_nw]):
		return np.nan
	a = KM_PER_NM * (r_ne + r_sw)
	b = KM_PER_NM * (r_nw + r_se)
	area = np.pi * a * b
	return np.nan if area == 0 else area

In [89]:
def calculate_circle_area(r):
	if np.isnan(r) or r == 0:
		return np.nan
	return np.pi * (KM_PER_NM * r) ** 2

In [90]:
df = pd.read_csv("data/hurdat2_cleaned.csv")

In [91]:
speeds = [34, 50, 64]
for v in speeds:
	df[f'size_{v}kt_sqkm'] = df.apply(axis = 1, func = lambda row: calculate_ellipse_area(row[f'r_{v}kt_ne'], row[f'r_{v}kt_se'], row[f'r_{v}kt_sw'], row[f'r_{v}kt_nw']))

df['size_max_sqkm'] = df.apply(axis = 1, func = lambda row: calculate_circle_area(row['r_max_sus']))

In [92]:
gb = df.groupby('code').agg(
	{
		'size_34kt_sqkm': 'max',
		'size_50kt_sqkm': 'max',
		'size_64kt_sqkm': 'max',
		'size_max_sqkm': 'max'
	}
)
gb.reset_index(inplace = True)

In [93]:
gb.to_csv('data/size.csv', index = False)