The purpose of this notebook is to manually generate psv sim files for desired cases.

In [1]:
!which python

/Users/josephpicca/opt/anaconda3/envs/impacts-parallel-run/bin/python


In [None]:
import datetime
import gzip
import pathlib
from os import environ as E

#import geojson
import numpy as np
import pyproj
from scipy import stats

import dclasses as dc
import pygrib as pg

In [3]:
import io
import pandas as pd

In [3]:
### Parse CLI Arguments ###
ndfd_file = pathlib.Path('../data/output/examples/20110427/ndfd/torn_day1_grib2_1630_20110427162927')

pp_file = pathlib.Path('./test.npz')
ndfd_area = 25
nsims = 10000
tornado_direction_distribution = stats.norm(50, 15)
coolseason = [1, 2, 3, 4, 11, 12]

In [4]:
impacts_data_root = pathlib.Path('../scripts/impacts-data/pas-input-data/')

In [5]:
outdir = pathlib.Path('../scripts/impacts-data/',"output").resolve()
outdir.mkdir(exist_ok=True)

In [6]:
# Read grib file
def read_ndfd_grib_file(grbfile):
    """ Read an SPC Outlook NDFD Grib2 File """
    with pg.open(grbfile.as_posix()) as GRB:
        try:
            vals = GRB[1].values.filled(-1)
        except AttributeError:
            vals = GRB[1].values
    return vals

In [7]:
date_in_name = ndfd_file.name.split("_")[-1]
dt = datetime.datetime.strptime(date_in_name, "%Y%m%d%H%M%S")
outfile = outdir.joinpath(f"{dt.strftime('%Y%m%d%H%M%S')}_ts.psv.gz")

In [8]:
#torn = read_ndfd_grib_file(ndfd_file)
fcsts = np.load('test.npz')
# Multiply 100 to convert from decimal to percent
torn = fcsts['all_fcst']*100

In [9]:
continuous_torn = dc.make_continuous(torn)

In [10]:
#sigtorn = read_ndfd_grib_file(ndfd_file.with_name(ndfd_file.name.replace("torn", "sigtorn"))).astype(int)

sigtorn = fcsts['sig_fcst']*100

In [11]:
sigtorn[sigtorn > 0] = 1
if (torn.max() >= 30) and (sigtorn.max() > 0):
    sigtorn[torn >= 15] += 1
sigtorn_1d = sigtorn.ravel()
usesig = True if (dt.month in coolseason) or (sigtorn.max() > 0) else False

In [12]:
### Run Tornado Count Simulation ###
print(f"Running {nsims:,d} Tornado Count Simulations")
tornado_dists = dc.TornadoDistributions()
counts = np.zeros((5, nsims), dtype=int)
counts[0, :] = (tornado_dists.f02.rvs(nsims) * ndfd_area * (torn == 2).sum()).astype(int)
counts[1, :] = (tornado_dists.f05.rvs(nsims) * ndfd_area * (torn == 5).sum()).astype(int)
counts[2, :] = (tornado_dists.f10.rvs(nsims) * ndfd_area * (torn == 10).sum()).astype(int)
counts[3, :] = (tornado_dists.f15.rvs(nsims) * ndfd_area * (torn == 15).sum()).astype(int)
counts[4, :] = (tornado_dists.f30.rvs(nsims) * ndfd_area * (torn >= 30).sum()).astype(int)

Running 10,000 Tornado Count Simulations


In [13]:
### Setup Impact Simulation ###
igrids = dc.ImpactGrids(impacts_data_root)

In [14]:
scounts = counts.sum(axis=1)
inds02 = dc.weighted_choice(prob=2, probs=torn, cprobs=continuous_torn, size=scounts[0])
inds05 = dc.weighted_choice(prob=5, probs=torn, cprobs=continuous_torn, size=scounts[1])
inds10 = dc.weighted_choice(prob=10, probs=torn, cprobs=continuous_torn, size=scounts[2])
inds15 = dc.weighted_choice(prob=15, probs=torn, cprobs=continuous_torn, size=scounts[3])
inds30 = dc.weighted_choice(prob=30, probs=torn, cprobs=continuous_torn, size=scounts[4])
inds = dc.flatten_list([inds02, inds05, inds10, inds15, inds30])

In [15]:
non_sig_inds = sigtorn_1d[inds] == 0
single_sig_inds = sigtorn_1d[inds] == 1
double_sig_inds = sigtorn_1d[inds] == 2

In [16]:
if usesig:
    single_sig_inds += non_sig_inds
    non_sig_inds[:] = False

In [17]:
# Handle Locations
non_sig_loc_inds = inds[non_sig_inds]
single_sig_loc_inds = inds[single_sig_inds]
double_sig_loc_inds = inds[double_sig_inds]

In [18]:
# Handle Ratings
_mags=[0, 1, 2, 3, 4, 5]
non_sig_ratings = np.random.choice(_mags, size=non_sig_inds.sum(),
                                    replace=True, p=tornado_dists.r_nonsig)
single_sig_ratings = np.random.choice(_mags, size=single_sig_inds.sum(),
                                        replace=True, p=tornado_dists.r_singlesig)
double_sig_ratings = np.random.choice(_mags, size=double_sig_inds.sum(),
                                            replace=True, p=tornado_dists.r_doublesig)

In [19]:
# Handle Distances
non_sig_distances = dc.get_distances(non_sig_ratings, tornado_dists)
single_sig_distances = dc.get_distances(single_sig_ratings, tornado_dists)
double_sig_distances = dc.get_distances(double_sig_ratings, tornado_dists)

In [20]:
%%time

#print("Running simulations...")
#print("    Non Sig...")
non_sig = dc.simulate(non_sig_loc_inds, non_sig_distances,
                        non_sig_ratings, tornado_direction_distribution, igrids)
#print("    Single Sig...")
single_sig = dc.simulate(single_sig_loc_inds, single_sig_distances,
                            single_sig_ratings, tornado_direction_distribution, igrids)
#print("    Double Sig...")
double_sig = dc.simulate(double_sig_loc_inds, double_sig_distances,
                            double_sig_ratings, tornado_direction_distribution, igrids)

CPU times: user 2min 16s, sys: 4.35 s, total: 2min 20s
Wall time: 2min 20s


In [21]:
#print("Splitting simulations back out...")
simulated_tornadoes = dc.flatten_list([non_sig, single_sig, double_sig])
np.random.shuffle(simulated_tornadoes)
_sims = np.split(simulated_tornadoes, counts.sum(axis=0).cumsum())[:-1]
realizations = dc.Realizations([dc.SyntheticTornadoRealization(_sim, i+1) for i, _sim in enumerate(_sims)])

### Ensuring we can read in the psv and that the data check out

In [22]:
df = pd.read_csv(io.StringIO(realizations.as_psv),sep='|')

In [23]:
df.groupby('sim').sum()['population'].describe(percentiles=[0.1,0.5,0.9])

count    1.000000e+04
mean     4.410158e+05
std      2.708835e+05
min      1.542400e+04
10%      1.725712e+05
50%      3.800800e+05
90%      7.769200e+05
max      3.092832e+06
Name: population, dtype: float64

In [24]:
with gzip.GzipFile(outfile, "w") as OUT:
    OUT.write(realizations.as_psv.encode())