In [1]:
import numpy as np
import geopandas as gp
import shapely as sp
import matplotlib.pyplot as plt
import contextily as cx
import cartopy as cp
from datetime import datetime as dt
import datetime
from datetime import timedelta
import xarray as xr
import pandas as pd
import re
import os
import cartopy.crs as ccrs
import utils
from utils_filter import *
from utils_datetime import *
from utils_plotting import *

Binomial Significance test on whether storm reports (grid point wise) are significantly different from outlooks. Then we have values for each day, which we can break up by hazard, risk level, significance threshold (p value), region (could make composite map), moderate days. 

In [21]:
data_location = 'data'
moderate = False
outlooks, pph, reports = read_datasets(data_location, moderate)
outlooks_save, pph_save, reports_save = outlooks, pph, reports

reading outlooks 1
reading outlooks 2


  outlooks = outlooks.append(gp.read_file(data_location + '/outlooks/' + mod_string + '_outlooks_2.shp'))


reading pph
reading storm reports


In [34]:
#get grid? Se where it's needed

hazards = ['wind', 'hail', 'tor']
thresholds = [.02, .05, .1, .15, .25, .3, .35, .45, .6]
#initialize dataset with date, hazard type, and threshold as coordinates, p-value and location as data
outlook_verifications = xr.Dataset(
    coords=dict(
        date=outlooks['DATE'],
        hazard=hazards,
        threshold=thresholds
    ),
    attrs=dict(description="p value for if storm reports differed significantly from CO by date, hazard type, and threshold"),
)
outlook_verifications = outlook_verifications.assign(p = (('date', 'hazards', 'thresholds'), np.full((len(outlooks['DATE']), len(hazards), len(thresholds)), None)))
outlook_verifications = outlook_verifications.assign(loc = (('date', 'hazards', 'thresholds'), np.full((len(outlooks['DATE']), len(hazards), len(thresholds)), None)))

#make outlooks only day 1 tornado, hail, wind
outlooks = outlooks[outlooks['CATEGORY'].isin(['HAIL', 'WIND', 'TORNADO'])]

for date in outlooks['DATE']:
    outlooks_date = outlooks[outlooks['DATE'] == date]
    


#for each date:
#   filter outlooks, reports to only be from that date
#   for each hazard type and threshold
#       chose row of outlooks corresponding to that hazard type and threshold, subset of reports of that hazard type
#       for each grid point
#           if grid point is in shape
#               add 1 to count of number of grid points in shape
#               add 1 to true count if there is a hazard nearby
#       perform significance test, store p-value in dataset
#       store center of polygon in dataset
    
# could be faster:
#for each date:
#   filter outlooks, reports to only be from that date
#   for each hazard type
#       subset reports and outlooks to be only of hazard type
#       iterate through reports and turn nearest point true
#       for each threshold:
    #       chose row of outlooks corresponding to that threshold
    #       for each grid point
    #           if grid point is in shape
    #               add 1 to count of number of grid points in shape
    #               add 1 to true count if there is a hazard nearby
    #       perform significance test, store p-value in dataset
    #       store center of polygon in dataset

# at each significance level (.05, .01)
# For each hazard type: Map: center of each polygon, shape is threshold, color is frequency of rejection. Or split by threshold as well
# For each hazard type: Table summarizing frequency of rejection by region and overall for each threshold

In [36]:
reports

Unnamed: 0,field_1,STATE,EVENT_TYPE,CZ_TYPE,CZ_NAME,WFO,BEGIN_DATE_TIME,CZ_TIMEZONE,END_DATE_TIME,INJURIES_DIRECT,...,BEGIN_LOCATION,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,DATE,geometry
0,0,OKLAHOMA,Tornado,C,WASHITA,,28-APR-50 14:45:00,CST,28-APR-50 14:45:00,0,...,,0,,,35.12,-99.20,35.17,-99.20,2050-04-28,
1,1,TEXAS,Tornado,C,COMANCHE,,29-APR-50 15:30:00,CST,29-APR-50 15:30:00,0,...,,0,,,31.90,-98.60,31.73,-98.60,2050-04-29,
2,2,PENNSYLVANIA,Tornado,C,LEHIGH,,05-JUL-50 18:00:00,CST,05-JUL-50 18:00:00,2,...,,0,,,40.58,-75.70,40.65,-75.47,2050-07-05,
3,3,PENNSYLVANIA,Tornado,C,DAUPHIN,,05-JUL-50 18:30:00,CST,05-JUL-50 18:30:00,0,...,,0,,,40.60,-76.75,,,2050-07-05,
4,4,PENNSYLVANIA,Tornado,C,CRAWFORD,,24-JUL-50 14:40:00,CST,24-JUL-50 14:40:00,0,...,,0,,,41.63,-79.68,,,2050-07-24,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1054186,72123,ARIZONA,Thunderstorm Wind,C,PIMA,TWC,31-JUL-23 18:40:00,MST-7,31-JUL-23 18:40:00,0,...,(TUS)TUCSON INTL ARP,5,WSW,(TUS)TUCSON INTL ARP,32.0903,-111.0226,32.0903,-111.0226,2023-07-31,
1054187,72124,ARIZONA,Thunderstorm Wind,C,PIMA,TWC,31-JUL-23 19:35:00,MST-7,31-JUL-23 19:35:00,0,...,TUSCON MARANA NW AIRPORT,1,S,TUSCON MARANA NW AIRPORT,32.4,-111.22,32.4,-111.22,2023-07-31,
1054188,72125,GEORGIA,Thunderstorm Wind,C,DOUGLAS,FFC,06-AUG-23 13:25:00,EST-5,06-AUG-23 13:27:00,0,...,CHAPEL HILL,2,NNW,CHAPEL HILL,33.7,-84.73,33.7,-84.73,2023-08-06,
1054189,72132,MINNESOTA,Tornado,C,NORMAN,FGF,24-JUN-23 15:10:00,CST-6,24-JUN-23 15:14:00,0,...,SHELLY,4,ENE,SHELLY,47.49,-96.73,47.4991,-96.7571,2023-06-24,
