The purpose of this notebook is to create example json files (similar to however a backend API would be serving data) that contain data from the individual tornado simulations. These can be used to drive histograms, etc on the next version of the IMPACTS dashboard.

In [6]:
import pathlib
import glob
import os

import argparse
import geojson
import pandas as pd
import json
import random
import numpy as np

from datetime import datetime, timedelta

In [51]:
jsonResponse = []

In [52]:
# Read in example psv
file = '../data/output/examples/20210428124600.psv.gz'
df = pd.read_csv(file, sep="|")

### Bin by rating

In [53]:
bin_labels = ['0-1','2-3','4-5']
# lower bound is not inclusive, so we shift the thresholds one to the left
cut_bins = [-1,1,3,5]

# Add the hour bins to the original dataframe
df['ratingbin'] = pd.cut(df['rating'], bins=cut_bins, labels=bin_labels)
df[['rating','ratingbin','time']]

Unnamed: 0,rating,ratingbin,time
0,1,0-1,22
1,1,0-1,21
2,2,2-3,2
3,0,0-1,22
4,1,0-1,23
...,...,...,...
89374,0,0-1,23
89375,1,0-1,22
89376,1,0-1,1
89377,1,0-1,3


In [54]:
weak = df[df['ratingbin'] == '0-1']
sig = df[df['ratingbin'] == '2-3']
vio = df[df['ratingbin'] == '4-5']

In [55]:
weakSum = weak.groupby('sim').sum().loc[:,('population','hospitals','mobilehomes','psubstations')]
sigSum = sig.groupby('sim').sum().loc[:,('population','hospitals','mobilehomes','psubstations')]
vioSum = vio.groupby('sim').sum().loc[:,('population','hospitals','mobilehomes','psubstations')]

weakResponse = {'Level':'0-1'}
sigResponse = {'Level':'2-3'}
vioResponse = {'Level':'4-5'}

weakResponse['Sims'] = weakSum.reset_index().values.tolist()
sigResponse['Sims'] = sigSum.reset_index().values.tolist()
vioResponse['Sims'] = vioSum.reset_index().values.tolist()

In [56]:
jsonResponse.append(weakResponse)
jsonResponse.append(sigResponse)
jsonResponse.append(vioResponse)

In [39]:
with open('../data/output/examples/jsonResponse_sims.json','w') as fp:
    json.dump(jsonResponse, fp)

In [57]:
jsonResponse

[{'Level': '0-1',
  'Sims': [[1, 2128, 0, 0, 0],
   [2, 10800, 1, 450, 4],
   [3, 115280, 2, 425, 6],
   [4, 7808, 0, 0, 3],
   [5, 1760, 0, 0, 2],
   [6, 4992, 0, 75, 1],
   [7, 2016, 0, 0, 2],
   [8, 496, 0, 0, 0],
   [9, 29152, 0, 600, 4],
   [10, 51920, 2, 100, 5],
   [11, 640, 0, 25, 0],
   [12, 208, 0, 0, 0],
   [13, 944, 0, 0, 5],
   [14, 4288, 1, 100, 2],
   [15, 816, 0, 0, 0],
   [16, 2480, 0, 0, 1],
   [17, 1264, 0, 75, 0],
   [18, 464, 0, 0, 0],
   [19, 896, 0, 0, 0],
   [20, 5360, 0, 0, 1],
   [21, 352, 0, 0, 0],
   [22, 93264, 1, 0, 13],
   [23, 3648, 0, 25, 4],
   [24, 16, 0, 0, 0],
   [25, 24560, 0, 0, 8],
   [26, 2224, 0, 75, 0],
   [27, 1888, 0, 250, 2],
   [28, 16, 0, 0, 1],
   [29, 6336, 0, 0, 1],
   [30, 91136, 1, 250, 12],
   [31, 1008, 1, 0, 5],
   [32, 496, 0, 0, 2],
   [33, 1792, 0, 0, 1],
   [34, 128, 0, 0, 2],
   [35, 32272, 3, 25, 1],
   [36, 4880, 0, 25, 0],
   [37, 11584, 0, 25, 5],
   [38, 1600, 1, 0, 2],
   [39, 3008, 0, 0, 2],
   [40, 112352, 1, 25, 9],


### Make an example file just for TX
Need to know how I'll be serving up data before going any farther
For the time being, just want an additional example such that I can code functionality in javascript 

In [58]:
jsonResponse = []

In [59]:
# Read in example psv
file = '../data/output/examples/20210428124600.psv.gz'
df = pd.read_csv(file, sep="|")

In [61]:
df.head()

Unnamed: 0,sim,population,distance,rating,states,counties,wfos,hospitals,hospitalbeds,mobileparks,mobilehomes,psubstations,plines,time,slon,slat,elon,elat
0,1,1056,2.0662,1,NY,36013,BUF,0,0,0,0,0,0,22,-79.1592,42.4951,-79.1388,42.5209
1,1,480,2.6825,1,NY,36097,BGM,0,0,0,0,0,0,21,-76.944,42.4153,-76.9021,42.4388
2,1,0,4.5373,2,TX,48023,OUN,0,0,0,0,0,0,2,-99.0422,33.6459,-98.9941,33.698
3,1,80,0.7748,0,TX,48143,FWD,0,0,0,0,0,0,22,-98.1495,32.0895,-98.1437,32.0995
4,1,368,3.8219,1,TX,4836748497,FWD,0,0,0,0,0,0,23,-97.8818,33.0339,-97.8283,33.0662


In [62]:
stBrokenOut = df.assign(category=df['states'].str.split(',')).explode('category').reset_index(drop=True)

# Remove row if state is NaN
stBrokenOut = stBrokenOut[stBrokenOut['category'].notna()]

# Grab a list of the unique states in the simulation
statesImpacted = stBrokenOut['category'].unique().tolist()

In [63]:
# Just grab TX
df_TX = stBrokenOut[stBrokenOut['category'] == 'TX']
df_TX.head()

Unnamed: 0,sim,population,distance,rating,states,counties,wfos,hospitals,hospitalbeds,mobileparks,mobilehomes,psubstations,plines,time,slon,slat,elon,elat,category
2,1,0,4.5373,2,TX,48023,OUN,0,0,0,0,0,0,2,-99.0422,33.6459,-98.9941,33.698,TX
3,1,80,0.7748,0,TX,48143,FWD,0,0,0,0,0,0,22,-98.1495,32.0895,-98.1437,32.0995,TX
4,1,368,3.8219,1,TX,4836748497,FWD,0,0,0,0,0,0,23,-97.8818,33.0339,-97.8283,33.0662,TX
5,1,144,0.5527,0,TX,48035,FWD,0,0,0,0,0,0,1,-97.4614,32.2109,-97.4531,32.2147,TX
7,1,0,0.5467,2,TX,48503,FWD,0,0,0,0,0,0,23,-98.7699,33.2014,-98.7633,33.2071,TX


### Bin By Rating

In [66]:
bin_labels = ['0-1','2-3','4-5']
# lower bound is not inclusive, so we shift the thresholds one to the left
cut_bins = [-1,1,3,5]

# Add the hour bins to the original dataframe
df_TX['ratingbin'] = pd.cut(df_TX['rating'], bins=cut_bins, labels=bin_labels)
df_TX[['rating','ratingbin','time']].head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_TX['ratingbin'] = pd.cut(df_TX['rating'], bins=cut_bins, labels=bin_labels)


Unnamed: 0,rating,ratingbin,time
2,2,2-3,2
3,0,0-1,22
4,1,0-1,23
5,0,0-1,1
7,2,2-3,23


In [68]:
weak = df_TX[df_TX['ratingbin'] == '0-1']
sig = df_TX[df_TX['ratingbin'] == '2-3']
vio = df_TX[df_TX['ratingbin'] == '4-5']

In [69]:
weakSum = weak.groupby('sim').sum().loc[:,('population','hospitals','mobilehomes','psubstations')]
sigSum = sig.groupby('sim').sum().loc[:,('population','hospitals','mobilehomes','psubstations')]
vioSum = vio.groupby('sim').sum().loc[:,('population','hospitals','mobilehomes','psubstations')]

weakResponse = {'Level':'0-1'}
sigResponse = {'Level':'2-3'}
vioResponse = {'Level':'4-5'}

weakResponse['Sims'] = weakSum.reset_index().values.tolist()
sigResponse['Sims'] = sigSum.reset_index().values.tolist()
vioResponse['Sims'] = vioSum.reset_index().values.tolist()

In [70]:
jsonResponse.append(weakResponse)
jsonResponse.append(sigResponse)
jsonResponse.append(vioResponse)

In [76]:
with open('../data/output/examples/jsonResponse_sims_TX.json','w') as fp:
    json.dump(jsonResponse, fp)

In [82]:
jsonResponse[0]

{'Level': '0-1',
 'Sims': [[1, 592, 0, 0, 0],
  [2, 80, 0, 0, 4],
  [3, 87136, 2, 0, 3],
  [4, 304, 0, 0, 0],
  [5, 368, 0, 0, 0],
  [6, 1136, 0, 0, 0],
  [8, 128, 0, 0, 0],
  [9, 24976, 0, 600, 4],
  [10, 29744, 2, 0, 3],
  [11, 96, 0, 0, 0],
  [12, 208, 0, 0, 0],
  [13, 240, 0, 0, 0],
  [14, 3824, 0, 0, 1],
  [15, 464, 0, 0, 0],
  [17, 1264, 0, 75, 0],
  [18, 464, 0, 0, 0],
  [19, 784, 0, 0, 0],
  [20, 4528, 0, 0, 1],
  [21, 64, 0, 0, 0],
  [22, 91696, 1, 0, 12],
  [23, 80, 0, 0, 0],
  [24, 16, 0, 0, 0],
  [25, 3376, 0, 0, 1],
  [26, 1104, 0, 25, 0],
  [27, 240, 0, 0, 0],
  [28, 16, 0, 0, 1],
  [30, 7952, 0, 0, 0],
  [31, 240, 0, 0, 1],
  [32, 16, 0, 0, 2],
  [33, 688, 0, 0, 0],
  [34, 48, 0, 0, 2],
  [35, 29792, 3, 25, 1],
  [36, 80, 0, 0, 0],
  [37, 96, 0, 0, 0],
  [38, 1472, 1, 0, 1],
  [39, 112, 0, 0, 0],
  [40, 3600, 0, 25, 5],
  [42, 2144, 0, 175, 3],
  [43, 800, 0, 0, 1],
  [44, 5616, 0, 175, 1],
  [45, 9616, 0, 25, 3],
  [46, 304, 0, 0, 0],
  [47, 5936, 0, 50, 0],
  [48, 9835