In [1]:
%load_ext jupyter_black

In [2]:
import json
from glob import glob
from pathlib import Path

import nvector
import numpy as np
import pandas as pd
import geopandas as gpd
from typing import NewType, Iterable, Callable
from shapely.geometry import Point
import nvector as nv

# ml
import tensorflow
from sklearn.manifold import LocallyLinearEmbedding

# plotting
import matplotlib.pyplot as plt

# number of seconds in 2 mins
TWO_MINS = 120.0
idx: slice = pd.IndexSlice

FeatureCollection = NewType("FeatureCollection", dict)

wgs84 = nv.FrameE(name="WGS84")

all_files = sorted(glob("/workspaces/sppp/data/probsevere/*.json"))

In [3]:
# data transformation and initial loading
def open_file(filepath: Path) -> FeatureCollection:
    with filepath.open("rb") as fin:
        return json.load(fin)


def to_dataframe(fc: FeatureCollection) -> pd.DataFrame:
    df = gpd.GeoDataFrame.from_features(fc["features"])
    df["validTime"] = pd.to_datetime(fc["validTime"], format="%Y%m%d_%H%M%S %Z")
    df["CENTROID"] = df["geometry"].centroid

    def ecef_vector():
        for point in df["geometry"].centroid:
            geo_point = wgs84.GeoPoint(
                longitude=point.x, latitude=point.y, degrees=True
            )
            yield geo_point.to_ecef_vector()

    df["ECEF_VECTOR"] = tuple(ecef_vector())

    df = df.set_index(["validTime", "ID"])
    return df
    # df =


def to_midf() -> pd.DataFrame:
    def generate():
        for file in all_files:
            fc = open_file(Path(file))
            yield to_dataframe(fc)

    return pd.concat(generate())


midf = to_midf()

In [5]:
# helper function
# from sppp.extract.funcs import mask_frames_by_id
from sklearn.manifold import LocallyLinearEmbedding


def embed(df: pd.DataFrame) -> pd.DataFrame:

    lle = LocallyLinearEmbedding(n_components=1, n_neighbors=10)
    fresh = df[["PS", "MOTION_EAST", "MOTION_SOUTH", "CENTROID", "ECEF_VECTOR"]].copy()
    fresh["STAB"] = lle.fit_transform(midf[["MUCAPE", "MLCAPE", "MLCIN"]])
    cent = fresh["CENTROID"]
    fresh["X"] = cent.x
    fresh["Y"] = cent.y
    fresh = fresh.drop("CENTROID", axis=1)
    condition = fresh.columns[fresh.columns != "ECEF_VECTOR"]
    fresh[condition] = fresh[condition].astype(np.float32)
    return fresh


fresh = midf.pipe(embed)
fresh

Unnamed: 0_level_0,Unnamed: 1_level_0,PS,MOTION_EAST,MOTION_SOUTH,ECEF_VECTOR,STAB,X,Y
validTime,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-10-11 00:00:53+00:00,89234,11.0,15.042,-14.019,"ECEFvector(pvector=[[-530266.6328842767], [-49...",0.001116,-96.068283,38.238827
2021-10-11 00:00:53+00:00,89321,4.0,0.423,-2.061,"ECEFvector(pvector=[[943548.7176692891], [-571...",0.000710,-80.632050,24.728102
2021-10-11 00:00:53+00:00,89467,98.0,9.610,-11.265,"ECEFvector(pvector=[[-619075.6783992505], [-50...",-0.002347,-96.946304,36.715923
2021-10-11 00:00:53+00:00,89470,2.0,-1.606,0.677,"ECEFvector(pvector=[[1335819.2918418334], [-50...",0.000850,-75.129051,35.398010
2021-10-11 00:00:53+00:00,89519,100.0,12.789,-8.273,"ECEFvector(pvector=[[-748148.9557063201], [-51...",-0.000822,-98.236542,35.127361
...,...,...,...,...,...,...,...,...
2021-10-11 01:00:58+00:00,90389,4.0,11.984,-13.031,"ECEFvector(pvector=[[-624810.164130543], [-501...",0.001133,-97.099228,37.659824
2021-10-11 01:00:58+00:00,90390,33.0,13.982,-3.008,"ECEFvector(pvector=[[-737892.7786927632], [-52...",0.000385,-98.022614,34.098648
2021-10-11 01:00:58+00:00,90391,10.0,17.650,-6.134,"ECEFvector(pvector=[[-839030.8095662066], [-53...",0.000909,-98.894226,31.783758
2021-10-11 01:00:58+00:00,90392,64.0,13.725,-0.784,"ECEFvector(pvector=[[-862084.2989790718], [-53...",-0.000396,-99.115829,31.531752


In [6]:
# machine learning support classes
from tensorflow import keras
from keras.engine.sequential import Sequential


class State:
    __has_state: bool = False

    def __init__(self):
        self.__latests: pd.DataFrame = None
        self.__state: pd.DataFrame = None

    def __repr__(self) -> str:
        return self.__state.__repr__()

    @property
    def latests(self) -> pd.DataFrame:
        return self.__latests

    def set_storm(self, df: pd.DataFrame) -> None:

        self.__latests = df

        if not self.__has_state:
            self.__has_state = True
            self.__state = df
        else:
            old = self.__state
            self.__state = pd.concat([old, df])

    def iterstorms(self):
        df = self.__state.iloc[-2:].groupby("ID")
        for id, x in self.__state.iloc[-2:].groupby("ID"):
            yield x

    def has_hist(self) -> bool:
        return isinstance(self.__state, pd.DataFrame)

    @property
    def frame(self) -> pd.DataFrame:
        return self.__state


def build_model(frame_b: pd.DataFrame) -> Sequential:
    model = keras.models.Sequential(
        [
            keras.layers.Dense(
                22.5,
                activation="elu",
                input_shape=frame_b.shape,
            ),
            keras.layers.Dense(
                22.5,
                activation="sigmoid",
            ),
            keras.layers.Dense(3),
        ]
    )
    return model

In [9]:
import tensorflow as tf
import gym

from sppp.extract.funcs import mask_frames_by_id


class SPPPEnv(gym.Env):
    def __init__(self, state: "State") -> None:
        self.state = state

    def _compute_reward(self) -> int:
        return 1

    def step(self, action: np.ndarray):
        # action is produced by DQN, action is discrete
        # self.cache.move(action)
        # compute reward based on state(position) of the car
        # storm_state = self.car_agent.getCarState()
        # reward = self._compute_reward(storm_state)
        # # check if the episode is done
        # car_controls = self.car_agent.getCarControls()
        # done = self._isDone(storm_state, car_controls, reward)
        # # log info
        # info = {}
        # # observation is RGB image from car's camera
        # observation = self.car_agent.observe()
        observation = self.state.latests
        reward = self._compute_reward()
        done = False
        info = {}
        return observation, reward, done, info

    @property
    def observation_space(self) -> pd.DataFrame:
        return self.state.latests


def iterframe(df: pd.DataFrame) -> Iterable[tuple[pd.Timestamp, pd.DataFrame]]:
    yield from df.groupby("validTime")


n_inputs = 4
state = State()
env = SPPPEnv(state)
loss = keras.losses.binary_crossentropy


def play_one_step(
    env: SPPPEnv,
    obs: np.ndarray,
    model: Sequential,
    loss_fn: Callable[[any], any],
):
    """policy gradient"""
    with tf.GradientTape() as tape:
        left_prob = model(obs[np.newaxis])
        action = tf.random.uniform([1, 1]) > left_prob

        y_target = tf.constant([[1.0]]) - tf.cast(action, tf.float32)

        loss = tf.reduce_mean(loss_fn(y_target, left_prob))

    grads = tape.gradient(loss, model.trainable_variables)
    return action.numpy()  # [0, 0].numpy()
    # print(type(loss_fn))

    # obs, reward, done, info = env.step(int(action[0, 0].numpy()))
    # return obs, reward, done, info


n_outputs = 5

if __name__ == "__main__":
    for vt, df in iterframe(fresh):
        track_rewards = 0
        obs = env.reset()
        # evaluate prediction
        if state.has_hist():
            # there is existing storm information normalize frame_a and frame_b by the the in's in the index
            frame_a, frame_b = mask_frames_by_id(state.latests, df)
            # assert that the frames are of an equal shape
            assert frame_a.shape == frame_b.shape
            bg = frame_b[["MOTION_EAST", "MOTION_SOUTH", "STAB"]].values

            model = build_model(bg)

            x = play_one_step(env, bg, model, loss)
            # obs, reward, done, info = env.step(frame_b[["MOTION_EAST", "MOTION_SOUTH"]])
        # make a prediction
        state.set_storm(df)

# frame_b[["MOTION_EAST", "MOTION_SOUTH"]] * TWO_MINSb

In [10]:
df = state.frame.copy()
df
# df["ECEF_VECTOR"] + (df[["X", "Y"]] * TWO_MINS).stack().values[:, np.newaxis]

Unnamed: 0_level_0,Unnamed: 1_level_0,PS,MOTION_EAST,MOTION_SOUTH,ECEF_VECTOR,STAB,X,Y
validTime,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-10-11 00:00:53+00:00,89234,11.0,15.042,-14.019,"ECEFvector(pvector=[[-530266.6328842767], [-49...",0.001116,-96.068283,38.238827
2021-10-11 00:00:53+00:00,89321,4.0,0.423,-2.061,"ECEFvector(pvector=[[943548.7176692891], [-571...",0.000710,-80.632050,24.728102
2021-10-11 00:00:53+00:00,89467,98.0,9.610,-11.265,"ECEFvector(pvector=[[-619075.6783992505], [-50...",-0.002347,-96.946304,36.715923
2021-10-11 00:00:53+00:00,89470,2.0,-1.606,0.677,"ECEFvector(pvector=[[1335819.2918418334], [-50...",0.000850,-75.129051,35.398010
2021-10-11 00:00:53+00:00,89519,100.0,12.789,-8.273,"ECEFvector(pvector=[[-748148.9557063201], [-51...",-0.000822,-98.236542,35.127361
...,...,...,...,...,...,...,...,...
2021-10-11 01:00:58+00:00,90389,4.0,11.984,-13.031,"ECEFvector(pvector=[[-624810.164130543], [-501...",0.001133,-97.099228,37.659824
2021-10-11 01:00:58+00:00,90390,33.0,13.982,-3.008,"ECEFvector(pvector=[[-737892.7786927632], [-52...",0.000385,-98.022614,34.098648
2021-10-11 01:00:58+00:00,90391,10.0,17.650,-6.134,"ECEFvector(pvector=[[-839030.8095662066], [-53...",0.000909,-98.894226,31.783758
2021-10-11 01:00:58+00:00,90392,64.0,13.725,-0.784,"ECEFvector(pvector=[[-862084.2989790718], [-53...",-0.000396,-99.115829,31.531752
