# Interactive Hurricane Frequencies Plot

This script generates simulated hurricane frequency data for a user-inputted number of years into the future and answers the question, how much would the slope of the hurricane frequency function have to increase for the increase to be statistically detectable at the end year. 


For example, if the year 2054 is chosen, what would the slope of the hurricane frequency function have to be in order to detect that the frequency of hurricanes has been increasing, if we were asking this question in 2054. 

### Methods

normal distribution modeling, p-value analysis, 

In [103]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import linregress
import numpy as np
import ipywidgets as widgets
from ipywidgets import interactive

In [104]:
def line(mean_hurricanes, slope, years, model_cutoff_year):
    """
    Calculates number of hurricanes for a linear model given a slope and year values

    Parameters:
    mean_hurricanes (float): Mean number of hurricanes in observed data
    slope (float or int): slope of the line
    years (numpy array): Array of years
    model_cutoff_year (int): Reference year for the model

    Returns:
    y (numpy array): Array of number of hurricanes
    """
    y = mean_hurricanes + slope * (years - model_cutoff_year)
    return y

In [105]:
def simulate_hurricanes(slope, std, mean_hurricanes, first_sim_year=2024, time_range=30):
    """
    Simulates hurricane frequency based on the provided slope and time range.

    Parameters:
    slope (float or int): Desired slope for the model.
    std (float or int): standard deviation of the observed frequency distribution of hurricanes
    first_sim_year (int): the first yuear for simulation, defaults to 2024.
    time_range (int): Number of years to simulate, defaults to 30
    

    Returns:
    sim (pandas DataFrame): Simulated data as a DataFrame.
    """
    x = np.linspace(first_sim_year, first_sim_year + time_range - 1, time_range).astype(int)
    y = line(mean_hurricanes=mean_hurricanes, slope=slope, years=x, model_cutoff_year=first_sim_year)
    simulated_n = np.random.normal(y, scale=std).round()

    sim = pd.DataFrame({'Year': x, 'n_hurricanes': simulated_n}, dtype='Int32')
    return sim

In [106]:
def linear_fit(x, y):
    """
    Fit a line to the provided x and y values and print the slope, p-value, and the decision based on the p-value.
    """
    # Perform linear regression using scipy's linregress function
    slope, intercept, r_value, p_value, std_err = linregress(x, y)

    # Print results to evaluate if the slope is consistent with no slope
    print(f"Slope: {slope}")
    print(f"P-value: {p_value}")

    # Decision based on p-value
    if p_value > 0.05:
        print("The slope is not significantly different from zero (consistent with no slope).")
    else:
        print("The slope is significantly different from zero.")

    return slope, intercept, r_value, p_value, std_err

In [107]:
def plot_data(df, first_sim_year):
    """
    Plots the hurricane data, both observed and simulated

    Parameters:
    df (pd.DataFrame): Data containing 'Year' and 'n_hurricanes'
    slope (float): desired slope for the model.
    time_range (int): number of years simulated.
    model_cutoff_year(int): Reference year for the model
    """
    fig, ax = plt.subplots()

    df_original = df[df['Year'] < first_sim_year]
    df_simulated = df[df['Year'] >= first_sim_year]

    ax.scatter(df_original['Year'], df_original['n_hurricanes'], color='blue', label='Original Data')
    ax.scatter(df_simulated['Year'], df_simulated['n_hurricanes'], color='orange', label='Simulated Data')

    # Plot the fit line
    fit_slope, fit_intercept, _, p_value, _ = linear_fit(df['Year'], df['n_hurricanes'])
    ax.plot(df['Year'], fit_slope * df['Year'] + fit_intercept, color='red', label='Fit Line')

    # Display whether the slope is consistent with 0
    if p_value < 0.05:
        conclusion = "Slope significantly different from 0 (p < 0.05)"
        plt.title(f"Hurricane Simulation - {conclusion}", color='red')
    else:
        conclusion = "Slope not significantly different from 0 (p >= 0.05)"
        plt.title(f"Hurricane Simulation - {conclusion}")
    
    plt.xlabel('Year')
    plt.ylabel('Number of Hurricanes')
    plt.legend()
    
    plt.show()

In [108]:
def run_simulation(observed_df, slope, std, mean_hurricanes, first_sim_year=2024, time_range=30):
    """
    Orchestrates the simulation and plotting of hurricane data.

    Parameters:
    df (pandas DataFrame): data containing observed hurricanes ('Year' and 'n_hurricanes').
    slope (float or int): desired slope for the model.
    time_range (int): number of years simulated.
    """
    
    simulated_data = simulate_hurricanes(slope=slope, std=std, mean_hurricanes=mean_hurricanes,
                                         first_sim_year=first_sim_year, time_range=time_range)
    df_extended = pd.concat([observed_df, simulated_data], ignore_index=True)
    plot_data(df_extended, first_sim_year)
    
    return None

In [109]:
def interactive_plot(observed_df, slope, std, mean_hurricanes, first_sim_year=2024, time_range=30):
    """
    Interactive function to allow user to change the slope and time_range.
    """
    run_simulation(observed_df=observed_df, slope=slope, std=std, mean_hurricanes=mean_hurricanes, first_sim_year=first_sim_year, time_range=time_range)

In [110]:
df = pd.read_parquet('data/hurdat2.parquet')

n_cyclones = df.groupby('Year')['CycloneID'].nunique().reset_index() \
               .rename({'CycloneID': 'n_hurricanes'}, axis=1)

n_cyclones_filtered = n_cyclones.loc[n_cyclones['Year'] >= 1972, ['Year', 'n_hurricanes']]

mean_hurricanes = n_cyclones_filtered['n_hurricanes'].mean()
one_std = n_cyclones_filtered['n_hurricanes'].std()

In [111]:
# create interactive widgets
# slope_slider = widgets.FloatSlider(value=0, min=0, max=1, step=0.001, description='Slope:')
# time_range_slider = widgets.IntSlider(value=30, min=5, max=100, step=1, description='Time Range (years):')

slope_slider = widgets.FloatText(value=0, description='Slope:')
time_range_slider = widgets.IntText(value=30, description='Time Range (years):')

interactive_plot_widget = interactive(
    interactive_plot,
    first_sim_year=widgets.fixed(2024),
    observed_df=widgets.fixed(n_cyclones_filtered),
    slope=slope_slider,
    std=widgets.fixed(one_std),
    mean_hurricanes=widgets.fixed(mean_hurricanes),
    time_range=time_range_slider
)
display(interactive_plot_widget)

interactive(children=(FloatText(value=0.0, description='Slope:'), IntText(value=30, description='Time Range (y…