In [1]:
import numpy as np
import pandas as pd
import xarray as xr

past_string = "past_features"
future_string = "future_features"

past_features = xr.load_dataset("../features_us_northeast_2020.nc", group=past_string)
future_features = xr.load_dataset("../features_us_northeast_2020.nc", group=future_string)

In [2]:
def break_down_variable(data_vars):
    variable = {}
    for var in data_vars:
        content = var.split("::")
        if content[0] not in variable.keys():
            variable[content[0]] = {}
        if content[1] not in variable[content[0]].keys():
            variable[content[0]][content[1]] = []
        variable[content[0]][content[1]] += [content[2]]
    return variable

# Past Features

In [3]:
past_features

All variables

In [4]:
past_features.data_vars

Data variables:
    aster::mean_elevation::nbagg-percentile_10-25-circular           (dim_sample, dim_time_past) float32 ...
    aster::mean_elevation::nbagg-mean-25-circular                    (dim_sample, dim_time_past) float32 ...
    aster::mean_elevation::nbagg-percentile_90-25-circular           (dim_sample, dim_time_past) float32 ...
    aster::mean_elevation::nbagg-std-25-circular                     (dim_sample, dim_time_past) float32 ...
    aster::roughness::nbagg-percentile_10-25-circular                (dim_sample, dim_time_past) float32 ...
    aster::roughness::nbagg-mean-25-circular                         (dim_sample, dim_time_past) float32 ...
    aster::roughness::nbagg-percentile_90-25-circular                (dim_sample, dim_time_past) float32 ...
    aster::roughness::nbagg-std-25-circular                          (dim_sample, dim_time_past) float32 ...
    aster::gradient_x::nbagg-percentile_10-25-circular               (dim_sample, dim_time_past) float32 ...
   

In [5]:
past_variable = break_down_variable(list(past_features.data_vars)) 
past_variable

{'aster': {'mean_elevation': ['nbagg-percentile_10-25-circular',
   'nbagg-mean-25-circular',
   'nbagg-percentile_90-25-circular',
   'nbagg-std-25-circular'],
  'roughness': ['nbagg-percentile_10-25-circular',
   'nbagg-mean-25-circular',
   'nbagg-percentile_90-25-circular',
   'nbagg-std-25-circular'],
  'gradient_x': ['nbagg-percentile_10-25-circular',
   'nbagg-mean-25-circular',
   'nbagg-percentile_90-25-circular',
   'nbagg-std-25-circular'],
  'gradient_y': ['nbagg-percentile_10-25-circular',
   'nbagg-mean-25-circular',
   'nbagg-percentile_90-25-circular',
   'nbagg-std-25-circular'],
  'gradient_abs': ['nbagg-percentile_10-25-circular',
   'nbagg-mean-25-circular',
   'nbagg-percentile_90-25-circular',
   'nbagg-std-25-circular']},
 'ecmwf': {'100u': ['nbagg-percentile_10-25-circular',
   'nbagg-mean-25-circular',
   'nbagg-percentile_90-25-circular',
   'nbagg-std-25-circular'],
  '100v': ['nbagg-percentile_10-25-circular',
   'nbagg-mean-25-circular',
   'nbagg-percentil

In [6]:
past_variable.keys()

dict_keys(['aster', 'ecmwf', 'goesabi', 'goesglm', 'nexrad', 'composites'])

NEXRAD radars used to produce the dataset used in this study
| Location                   | Code |
|----------------------------|------|
| Albany, New York           | KENX |
| Binghamton, New York       | KBGM |
| Buffalo, New York          | KBUF |
| Burlington, Vermont        | KCXX | 
| Boston, Massachusetts      | KBOX | 
| Fort Drum, New York        | KTYX | 
| New York City, New York    | KOXZ | 
| Philadelphia, Pennsylvania | KDIX | 
| Portland, Maine            | KGYX | 
| Pittsburgh, Pennsylvania   | KPBZ | 
| State College, Pennsylvania| KCCX | 

Variables from the NEXRAD radar adopted in this study

---------------------------------------
25 dBZ echo top height
35 dBZ echo top height
45 dBZ echo top height
Maximum reflectivity
Vertically integrated liquid
U/V motion components from optical flow
---------------------------------------

In [7]:
past_variable['nexrad']

{'ECHOTOP-25': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ECHOTOP-35': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ECHOTOP-45': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular',
  'nbagg-median-25-circular',
  'nbagg-max-25-circular'],
 'VIL': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular',
  'nbagg-median-25-circular',
  'nbagg-max-25-circular'],
 'MAXZ': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular',
  'nbagg-max-25-circular',
  'nbagg-numgt_57-25-circular'],
 'FLOW-U': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg

Variables from the GOES-16 ABI instrument adopted in this study
| Level 1                                        | Level 2             |
|------------------------------------------------|---------------------|
| ABI band 01    ABI band 09    Difference 07 08 | Cloud top height    |
| ABI band 02    ABI band 10    Difference 07 09 | Cloud top pressure  |
| ABI band 03    ABI band 11    Difference 07 10 | Cloud optical depth |
| ABI band 04    ABI band 12    Difference 08 09 | CAPE                |
| ABI band 05    ABI band 13    Difference 08 10 | K-index             |
| ABI band 06    ABI band 14    Difference 11 13 | Lifted index        |
| ABI band 07    ABI band 15    Difference 12 13 | Showalter index     |
| ABI band 08    ABI band 16                     | Total totals index  |

In [8]:
past_variable['goesabi']

{'ABIC01': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ABIC02': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ABIC03': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ABIC04': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ABIC05': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ABIC06': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ABIC07': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'ABIC08': ['nbagg-p

Variables from the GOES-16 GLM instrument adopted in this study

----------------------------------
Flash density
Flash energy density Event density
Event energy density
----------------------------------

In [9]:
past_variable['goesglm']

{'flash_density': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'flash_energy_density': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular'],
 'event_density': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular',
  'nbagg-any-25-circular'],
 'event_energy_density': ['nbagg-percentile_10-25-circular',
  'nbagg-mean-25-circular',
  'nbagg-percentile_90-25-circular',
  'nbagg-std-25-circular']}

Variables from the ASTER DEM adopted in this study

---------------
Mean elevation 
Roughness 
Surface gradient 
Upslope flow
--------------

# Future feature

In [10]:
future_features

In [11]:
future_variable = break_down_variable(list(future_features.data_vars))
future_variable

{'ecmwf': {'deg0l': ['nbagg-median-25-circular']},
 'goesglm': {'event_density': ['nbagg-any-25-circular'],
  'event_energy_density': ['nbagg-mean-25-circular']},
 'nexrad': {'MAXZ': ['nbagg-mean-25-circular',
   'nbagg-max-25-circular',
   'nbagg-numgt_57-25-circular'],
  'VIL': ['nbagg-mean-25-circular',
   'nbagg-median-25-circular',
   'nbagg-max-25-circular'],
  'ECHOTOP-45': ['nbagg-mean-25-circular',
   'nbagg-median-25-circular',
   'nbagg-max-25-circular']}}

In [12]:
future_variable.keys()

dict_keys(['ecmwf', 'goesglm', 'nexrad'])

In [13]:
import gradboost_experiments as gb
gb.target_variables(future_features)

KeyError: 'train'

In [None]:
future_features[future_variable]