_Add back in: 1) opening geopolar SST (It gets used to provide coordinates to the stats arrays) 2) Read the `event_statistics` dict from the pickle that gets made during MHW calculation._

## Calculate Average Stats by pixel

Define functions for each of the MHW stats aggregations.

In [18]:
def no_occurrences(time_arr):
    '''
    returns the number of MHW events detected over the full time series
    '''
    return len(time_arr['time_start'])

In [19]:
def avg_duration(time_arr):
    '''
    returns the average duration of a MHW event over the full time series
    '''
    return np.mean(time_arr['duration'])

In [20]:
def avg_max_intensity(time_arr):
    '''
    returns the average value for the maximum intensity, `intensity_max`, 
    averaged over the full time series
    '''
    return np.mean(time_arr['intensity_max'])

In [25]:
def avg_mean_intensity(time_arr):
    '''
    returns the average value for the mean intensity, `intensity_mean`, 
    averaged over the full time series
    '''
    return np.mean(time_arr['intensity_mean'])

In [21]:
def avg_no_annual_events(time_arr):
    '''
    counts the number of mhw in each year and returns the average number
    over MHW events per year
    '''
    # getting the years from the start date
    f = lambda x: x.year
    years = np.vectorize(f, otypes=[int])(time_arr['date_start'])
    
    # count the number of events for every year from 2003 to 2021
    # don't include 2002 or 2022 because we don't have the full time series
    year_counts = np.empty(len(range(2003, 2022)))
    for i, yr in enumerate(range(2003, 2022)):
        year_counts[i] = np.count_nonzero(years == yr)
    
    # find the average number of events per year
    return np.mean(year_counts)

In [26]:
def avg_rate_onset(time_arr):
    '''
    returns the rate of onset of MHW, averaged over all MHW in the time 
    series (`rate_onset`)
    '''
    return np.mean(time_arr['rate_onset'])

In [27]:
def avg_rate_decline(time_arr):
    '''
    returns the rate of decline of MHW, averaged over all MHW in the time series 
    (`rate_decline`)
    '''
    return np.mean(time_arr['rate_decline'])

In [28]:
def avg_cum_intensity(time_arr):
    '''
    calculate the cumulative intensity in each year and returns the average over
    all the years
    '''
    # getting the years from the start date
    f = lambda x: x.year
    years = np.vectorize(f, otypes=[int])(time_arr['date_start'])
    
    # create a pandas dataframe to zip the years and the cumultive intensities
    df = pd.DataFrame({'year': years, 
                       'cumulative_intensity': time_arr['intensity_cumulative']})
    # groupby to take the mean of each year
    df = df.groupby('year').mean()
    # reindex to make sure that years without marine heatwaves are still present
    df = df.reindex(range(2003, 2022), fill_value=0)
    
    # find the average number of events per year
    return df['cumulative_intensity'].mean()

Run the functions on each of the pixels in the dataset

In [29]:
def apply_function(function, mhw_event_statistics, matching_arr):
    """
    Apply the given function in a vectorized manner (over all the pixels)
    to the MHW event data from mhw.detect(). Reformat and output as a
    dataarray.
    :function: a function which takes a 1D array that is to be applied to all
    the pixels
    :avg_stats_arr: the 0th output of the mhw.detect() function, in an array
    form for all of the pixels
    :matching_arr: a dataarray with lat and lon that has coordinate dimensions
    that matching the pixel locations of avg_stats_arr
    """
    stats_output = np.vectorize(function)(mhw_event_statistics)
    return xr.DataArray(stats_output, 
                          coords=[matching_arr.lat.values, 
                                  matching_arr.lon.values], 
                          dims=['latitude', 'longitude'])

In [30]:
no_occ_da = apply_function(no_occurrences, event_statstics, geopolar)
avg_dur_da = apply_function(avg_duration, event_statstics, geopolar)
avg_max_int_da = apply_function(avg_max_intensity, event_statstics, geopolar)
avg_mean_int_da = apply_function(avg_mean_intensity, event_statstics, geopolar)

avg_rate_on_da = apply_function(avg_rate_onset, event_statstics, geopolar)
avg_rate_off_da = apply_function(avg_rate_decline, event_statstics, geopolar)

avg_annual_events_da = apply_function(avg_no_annual_events, event_statstics, geopolar)
avg_cum_intensity_da = apply_function(avg_cum_intensity, event_statstics, geopolar)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  outputs = ufunc(*inputs)
  outputs = ufunc(*inputs)
  outputs = ufunc(*inputs)
  outputs = ufunc(*inputs)
  outputs = ufunc(*inputs)


## Save each statistic into individual netcdf files

In [31]:
output_dir = './data/processed/mhw_stats_outputs/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

Creating a land mask

In [32]:
mask = geopolar.isel(time=0).drop('time').rename(
    {'lat': 'latitude', 'lon': 'longitude'}
)

Number of Occurrences

In [39]:
# mask out land
no_occ_da = no_occ_da.where(~mask.isnull())

# Add metadata
no_occ_da = no_occ_da.rename('total_no_occurrences').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': '# of MHW', 'description': 
     "The total number of MHWs reported from EJO's MHW code for each pixel. " \
     "Passing along EJO's `n_events` metric.", 'processing_date': 
     datetime.strftime(date.today(), '%Y-%m-%d')}
)

# Save
no_occ_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'no_occurrences_CBay_geopolar.nc')
)

Average Duration

In [40]:
# Add metadata
avg_dur_da = avg_dur_da.rename('avg_duration').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': '# of days', 'description': "The 'duration' " \
     "statistic reported from EJO's MHW code for each pixel, averaged over all of " \
     "the MHW identified in that pixel", 'processing_date': 
     datetime.strftime(date.today(), '%Y-%m-%d')
    }
)

# Save
avg_dur_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'average_mhw_duration_CBay_geopolar.nc')
)

Average Max Intensity

In [41]:
# Add metadata
avg_max_int_da = avg_max_int_da.rename('avg_max_intensity').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': 'deg C', 
     'description': "returns the average value for the maximum intensity, " \
     "`intensity_max`, averaged over the full time series", 'processing_date': 
     datetime.strftime(date.today(), '%Y-%m-%d')}
)

# Save
avg_max_int_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'average_max_intensity_CBay_geopolar.nc')
)

Average Mean Intensity

In [36]:
# Add metadata
avg_mean_int_da = avg_mean_int_da.rename('avg_mean_intensity').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': 'deg C', 
     'description': "returns the average value for the mean intensity, " \
     "`intensity_mean`, averaged over the full time series", 'processing_date': 
     datetime.strftime(date.today(), '%Y-%m-%d')}
)

# Save
avg_mean_int_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'average_mean_intensity_CBay_geopolar.nc')
)

Average rate of onset

In [42]:
# Add metadata
avg_rate_on_da = avg_rate_on_da.rename('avg_onset_rate').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': 'deg C / day', 'description': "returns " \
     "the rate of onset of MHW, averaged over all MHW in the time series " \
     "(`rate_onset`)", 'processing_date':  datetime.strftime(date.today(), '%Y-%m-%d')}
)

# Save
avg_rate_on_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'average_rate_onset_CBay_geopolar.nc')
)

Average rate of decline

In [43]:
# Add metadata
avg_rate_off_da = avg_rate_off_da.rename('avg_decline_rate').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': 'deg C / day', 'description': "returns " \
    "the rate of deline of MHW, averaged over all MHW in the time series " \
    "(`rate_decline`)", 'processing_date': datetime.strftime(date.today(), '%Y-%m-%d')}
)

# Save
avg_rate_off_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'average_rate_decline_CBay_geopolar.nc')
)

Average number of annual events

In [46]:
# Mask land
avg_annual_events_da = avg_annual_events_da.where(~mask.isnull())

# Add metadata
avg_annual_events_da = avg_annual_events_da.rename('avg_no_annual_events').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': '# MHW', 'description': "counts the number " \
     "of mhw in each year and returns the average number of MHW events per year", 
     'processing_date': datetime.strftime(date.today(), '%Y-%m-%d')}
)

# Save
avg_annual_events_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'average_no_annual_events_CBay_geopolar.nc')
)

Average cumulative intensity

In [35]:
# Mask land
avg_cum_intensity_da = avg_cum_intensity_da.where(~mask.isnull())

# Add metadata
avg_cum_intensity_da = avg_cum_intensity_da.rename('avg_cum_intensity').assign_attrs(
    {'source': 'NOAA Geopolar', 'unit': 'deg C * day', 'description': "calculate the " \
     "cumulative intensity of all the mhw for each year and returns the average over " \
     "all the years", 'processing_date': datetime.strftime(date.today(), '%Y-%m-%d')}
)

# Save
avg_cum_intensity_da.to_dataset().to_netcdf(
    os.path.join(output_dir, 'average_cum_intensity_CBay_geopolar.nc')
)