In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
import cartopy as cp
from matplotlib.patches import FancyArrowPatch
from scipy.ndimage import map_coordinates
import metpy.calc as mpcalc
from geographiclib.geodesic import Geodesic
from utils_datetime import *
from utils_filter import *

from matplotlib.backends.backend_pdf import PdfPages


In [None]:
grid_outlooks = xr.open_dataset('data/outlooks/grid_outlooks.nc')
grid_outlooks = grid_outlooks.sel(time = grid_outlooks['time'] >= '200203300000')

In [None]:
# creates new displacements dataset by keeping data (get rid of shifts and div) 
# but changing x and y coords so that (0, 0) is at max or weighted center of outlook (or pph?). Since highest risk is seen as "center" of what will happen, even if weaker storms are focused to one direction
# then calculate things like shifts of quadrents

# note: grids will not quite be aligned when each is centered over a different point, but probably close enough


# finding x and y to make center for each date
grouped = grid_outlooks['prob'].groupby('time')

# Step 1: Find all points with the maximum prob for each day and compute mean coordinates
def find_mean_coords(group):
    max_prob = group.max()  # Maximum value in the group
    if max_prob == 0:
        mean_x = group['x'].mean().item()
        mean_y = group['y'].mean().item()
    else:
        # Select all points with prob == max_prob
        max_points = group.where(group == max_prob, drop=True)
        # Compute the mean of x and y
        mean_x = max_points['x'].mean().item()
        mean_y = max_points['y'].mean().item()
    # Round to nearest integers
    nearest_x = round(mean_x)
    nearest_y = round(mean_y)
    # Return as a Dataset
    return xr.Dataset({'nearest_x': nearest_x, 'nearest_y': nearest_y})

# Apply the function to each group
center_coords = grouped.map(find_mean_coords)
center_coords

del grid_outlooks, grouped

In [None]:
# Extract nearest_x and nearest_y
nearest_x = center_coords['nearest_x']
nearest_y = center_coords['nearest_y']


# Re-center displacements
def recenter_displacements(displacements, nearest_x, nearest_y):
    # Shift x and y coordinates based on nearest_x, nearest_y for each time
    new_x = displacements['x'] - nearest_x
    new_y = displacements['y'] - nearest_y
    
    # Assign shifted coordinates
    displacements = displacements.assign_coords({
        'x': new_x,
        'y': new_y
    })
    
    # Update lat and lon to depend on time, x, and y
    
    return displacements
# Initialize an empty list to store the recentered displacements



for dataset_location in ['data/pph/labelled_pph', 'data/displacement/displacements', 'data/outlooks/grid_outlooks']:
    print(dataset_location)
    recentered_list = []
    ds = xr.open_dataset(dataset_location + '.nc')
    ds = ds.sel(time = ds['time'] >= '200203300000')

    # Loop over each time step and apply recenter_displacements
    for t in range(len(ds.time)):
        # Get the current time slice of displacements
        displacement_slice = ds.isel(time=t)

        # Get the corresponding nearest_x and nearest_y for this time step
        nearest_x_t = nearest_x.isel(time=t)
        nearest_y_t = nearest_y.isel(time=t)

        # Apply the recentering function to the current time slice
        recentered_t = recenter_displacements(displacement_slice, nearest_x_t, nearest_y_t)

        # expanding needs to be done differently

        # Append the recentered displacements for this time slice
        recentered_list.append(recentered_t)
    print('combining')
    # Combine the recentered displacements back into a single xarray object
    recentered = xr.concat(recentered_list, dim='time')
    # Ensure proper ordering of dimensions
    #recentered = recentered.transpose('time', 'y', 'x', 'hazard')
    recentered.to_netcdf('~/recentered_data/' + dataset_location.split('/')[-1] + '_recentered.nc') 
    del recentered, recentered_list


In [None]:
# open pph, outlook, and displacements
#test_time = ['201104270000', '201905310000']
# do something like how shifts and divergence are calculated but in each quadrent

pph_recentered = xr.open_dataset('~/recentered_data/labelled_pph_recentered.nc')
outlooks_recentered = xr.open_dataset('~/recentered_data/grid_outlooks_recentered.nc')
displacements_recentered = xr.open_dataset('~/recentered_data/displacements_recentered.nc')

In [None]:
hazard_types= ['Wind', 'Hail', 'Tornado', 'All Hazard']

pph_key_dict = {
    'Wind': 'p_perfect_wind',
    'Hail': 'p_perfect_hail',
    'Tornado': 'p_perfect_tor',
    'All Hazard': 'p_perfect_totalsvr'
}

outlook_key_dict = {
    'Wind': 'Day 1 Wind',
    'Hail': 'Day 1 Hail',
    'Tornado': 'Day 1 Tornado',
    'All Hazard': 'Day 1'
}

In [None]:


displacements_recentered = displacements_recentered.assign(e_shift_n = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(n_shift_n = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(total_div_n = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))

displacements_recentered = displacements_recentered.assign(e_shift_e = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(n_shift_e = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(total_div_e = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))

displacements_recentered = displacements_recentered.assign(e_shift_s = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(n_shift_s = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(total_div_s = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))

displacements_recentered = displacements_recentered.assign(e_shift_w = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(n_shift_w = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))
displacements_recentered = displacements_recentered.assign(total_div_w = (('time', 'hazard'), np.full((len(displacements_recentered['time']), len(hazard_types)), 0.0)))

for side in ['n', 'e', 's', 'w']:
    print(side)
    for hazard in hazard_types:

        print(hazard)

        e_shifts = []
        n_shifts = []
        total_divs = []

        hazard_dataset = displacements_recentered.sel(hazard = hazard)
        for date in displacements_recentered['time']:

            weights = outlooks_recentered.sel(time = date, outlook = outlook_key_dict[hazard])['prob']
            if side == 'n':
                mask = weights.y >= 0
            elif side == 'e':
                mask = weights.x >= 0
            elif side == 's':
                mask = weights.y <= 0
            elif side == 'w':
                mask = weights.x <= 0
            weights = weights.where(mask, 0).fillna(0).data
            
            if weights.max() == 0: # no outlook, so weight at pph
                weights = pph_recentered.sel(time = date)[pph_key_dict[hazard]].where(mask, 0).fillna(0).data
            if weights.max() == 0:
                weights = None
            hazard_time_dataset = hazard_dataset.sel(time = date)
            e_shift = np.average(hazard_time_dataset['e_flow'].fillna(0), weights = weights)
            n_shift = np.average(hazard_time_dataset['n_flow'].fillna(0), weights = weights)
            div = np.gradient(hazard_time_dataset['x_flow'].fillna(0))[1] + np.gradient(hazard_time_dataset['y_flow'].fillna(0))[0]
            total_div = np.average(div, weights = weights)

            displacements_recentered['e_shift_' + side].loc[dict(time = date, hazard = hazard)] = e_shift
            displacements_recentered['n_shift_' + side].loc[dict(time = date, hazard = hazard)] = n_shift
            displacements_recentered['total_div_' + side].loc[dict(time = date, hazard = hazard)] = total_div

In [None]:
displacements_recentered.to_netcdf('~/recentered_data/displacements_recentered.nc')

# Exploring data

In [None]:
ds = xr.open_dataset('~/recentered_data/displacements_recentered.nc')
pph = xr.open_dataset('~/recentered_data/labelled_pph_recentered.nc')
grid_outlooks = xr.open_dataset('~/recentered_data/grid_outlooks_recentered.nc')

In [None]:
time_periods = ['all', '2002_2006', '2007_2011', '2012_2016', '2017_2023']
dpi = 1000
mdt = False

hazard_types= ['All Hazard', 'Wind', 'Hail', 'Tornado']

pph_key_dict = {
    'Wind': 'p_perfect_wind',
    'Hail': 'p_perfect_hail',
    'Tornado': 'p_perfect_tor',
    'All Hazard': 'p_perfect_totalsvr'
}

outlook_key_dict = {
    'Wind': 'Day 1 Wind',
    'Hail': 'Day 1 Hail',
    'Tornado': 'Day 1 Tornado',
    'All Hazard': 'Day 1'
}

In [None]:
for time_period in time_periods:
    print(time_period)
    if time_period != 'all':
        year1 = time_period.split('_')[0]
        year2 = time_period.split('_')[1]
        this_ds = ds.sel(time = (ds['time'] >= year1 + '01010000') & (ds['time'] <= year2 + '12312359'))
        this_pph = pph.sel(time = (pph['time'] >= year1 + '01010000') & (pph['time'] <= year2 + '12312359'))
        this_grid_outlooks = grid_outlooks.sel(time = (grid_outlooks['time'] >= year1 + '01010000') & (grid_outlooks['time'] <= year2 + '12312359'))
    else:
        year1 = '2002'
        year2 = '2023'
        this_ds = ds
        this_pph = pph
        this_grid_outlooks = grid_outlooks

    mdt_pph = this_pph.sel(time = (this_pph['MAX_CAT'].isin(['MDT', 'HIGH'])))
    mdt_ds = this_ds.sel(time = (this_pph['MAX_CAT'].isin(['MDT', 'HIGH'])))
    mdt_outlooks = this_grid_outlooks.sel(time = (this_pph['MAX_CAT'].isin(['MDT', 'HIGH'])))

    mean_displacements = this_ds.mean(dim = 'time')
    mdt_mean_displacements = mdt_ds.mean(dim = 'time')

    mean_outlooks = this_grid_outlooks['prob'].mean(dim = 'time')
    mdt_mean_outlooks = mdt_outlooks['prob'].mean(dim = 'time')

    mean_pph = this_pph[['p_perfect_hail', 'p_perfect_wind', 'p_perfect_tor', 'p_perfect_totalsvr']].mean(dim = 'time')
    mdt_mean_pph = mdt_pph[['p_perfect_hail', 'p_perfect_wind', 'p_perfect_tor', 'p_perfect_totalsvr']].mean(dim = 'time')

    # plotting composite displacement
    print('composite displacement')
    s = slice(-20, 20)

    if mdt:
        mean_displacements_slice = mdt_mean_displacements.sel(x = s, y = s)
        mean_outlooks_slice = mdt_mean_outlooks.sel(x = s, y = s)
    else:
        mean_displacements_slice = mean_displacements.sel(x = s, y = s)
        mean_outlooks_slice = mean_outlooks.sel(x = s, y = s)

    x, y = np.meshgrid(mean_displacements_slice['x'],  
                    mean_displacements_slice['y']) 
    if mdt:
        output_pdf = 'plots/results/colocated/composite_displacements_' + time_period + '.pdf'
    else:
        output_pdf = 'plots/results/colocated/allcat_composite_displacements_' + time_period + '.pdf'
    with PdfPages(output_pdf) as pdf:
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        axes = axes.flatten()
        
        labels = ['a', 'b', 'c', 'd']
        fig.suptitle(('Displacement Composites for MDT+ Days ' if mdt else ' Displacement Composite ') + year1 + '-' + year2)

        for i, hazard in enumerate(hazard_types):
            print(hazard)
            
            x, y = np.meshgrid(mean_displacements_slice['x'], mean_displacements_slice['y'])
            x_flow = mean_displacements_slice.sel(hazard=hazard)['x_flow']
            y_flow = mean_displacements_slice.sel(hazard=hazard)['y_flow']
            
            c = axes[i].contourf(x, y, mean_outlooks_slice.sel(outlook=outlook_key_dict[hazard]) + 0.01,
                                levels=[0.02, 0.05, 0.10, 0.15, 0.30, 0.45, 0.60, 1.00], 
                                alpha=0.5,
                                colors=['#008b00', '#8b4726', '#ffc800', '#ff0000', '#ff00ff', '#912cee', '#104e8b'])
            axes[i].quiver(x, y, x_flow, y_flow, scale=1, scale_units='xy')
            axes[i].set_title(hazard)
            axes[i].text(0.02, 1.05, labels[i], transform=axes[i].transAxes, fontsize=12, fontweight='bold', va='top', ha='left')
            fig.colorbar(c, ax=axes[i], orientation='vertical', pad=0.01, aspect=50, fraction=0.1)

        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)

    print(f"PDF saved at {output_pdf}")

    # plotting composite of pph-outlook
    print('composite difference')
    s = slice(-20, 20)

    if mdt:
        mean_pph_slice = mdt_mean_pph.sel(x = s, y = s)
        mean_outlooks_slice = mdt_mean_outlooks.sel(x = s, y = s)
    else:
        mean_pph_slice = mean_pph.sel(x = s, y = s)
        mean_outlooks_slice = mean_outlooks.sel(x = s, y = s)

    x, y = np.meshgrid(mean_pph_slice['x'],  
                    mean_pph_slice['y']) 
    # Set up PDF file
    if mdt:
        output_pdf = 'plots/results/colocated/composite_pph_outlooks_' + time_period + '.pdf'
    else:
        output_pdf = 'plots/results/colocated/allcat_composite_pph_outlooks_' + time_period + '.pdf'
    with PdfPages(output_pdf) as pdf:
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        axes = axes.flatten()
        
        labels = ['a', 'b', 'c', 'd']

        fig.suptitle(('PPH - Outlook Composites on MDT+ Days ' if mdt else 'PPH - Outlook Composites ') + year1 + '-' + year2)

        # Iterate through hazards and plot on subplots
        for i, hazard in enumerate(hazard_types):
            print(hazard)

            # Calculate differences and plot filled contours
            c = axes[i].contourf(
                x, y, mean_pph_slice[pph_key_dict[hazard]] / 100 - mean_outlooks_slice.sel(outlook=outlook_key_dict[hazard]),
                levels=np.linspace(-0.12, 0.12, 13), cmap='bwr'
            )

            # Add outlook contours
            axes[i].contour(
                x, y, mean_outlooks_slice.sel(outlook=outlook_key_dict[hazard]) + 0.01,
                levels=[0.02, 0.05, 0.10, 0.15, 0.30, 0.45, 0.60, 1.00],
                colors='black', linestyles='dashed'
            )

            # Title for each subplot
            title = hazard
            axes[i].set_title(title)

            # Label each subplot (a, b, c, d)
            axes[i].text(0.02, 1.05, labels[i], transform=axes[i].transAxes, fontsize=12, fontweight='bold', va='top', ha='left')

            # Add colorbar to each subplot
            fig.colorbar(c, ax=axes[i], orientation='vertical', pad=0.01, aspect=50, fraction=0.1, extend='both')

        # Adjust layout and save to PDF
        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)

    print(f"PDF saved at {output_pdf}")

Shift Distributions

In [None]:
ds.sel(time = '201104270000').load()

In [None]:
hazard_types= ['Wind', 'Hail', 'Tornado', 'All Hazard']
vs = ['e_shift',
 'n_shift',
 'total_div',
 'e_shift_n',
 'n_shift_n',
 'total_div_n',
 'e_shift_e',
 'n_shift_e',
 'total_div_e',
 'e_shift_s',
 'n_shift_s',
 'total_div_s',
 'e_shift_w',
 'n_shift_w',
 'total_div_w']
for hazard in hazard_types:
    for v in vs:
        ds_hazard = ds.sel(hazard = hazard)[v]
        if 'div' in v: 
            y, _, _ = plt.hist(ds_hazard, bins = 40, range = (-8, 8))
        else:
            y, _, _ = plt.hist(ds_hazard/1000, bins = 40, range = (-800, 800))
        plt.plot([0, 0], [0, y.max()], color='k', linestyle='-')
        plt.title(hazard + ' ' + v + ' distribution on MDT+ days since 2002')
        plt.show()

            

    

In [None]:
plt.hist(ds_hazard, bins = 40)