In [1]:
%load_ext jupyter_black

In [2]:
import json
from glob import glob
from pathlib import Path

import nvector
import numpy as np
import pandas as pd
import geopandas as gpd
from typing import NewType, Iterable, Callable
from shapely.geometry import Point
import nvector as nv

# ml
import tensorflow
from sklearn.manifold import LocallyLinearEmbedding

# plotting
import matplotlib.pyplot as plt

idx: slice = pd.IndexSlice

FeatureCollection = NewType("FeatureCollection", dict)

wgs84 = nv.FrameE(name="WGS84")

all_files = sorted(glob("/workspaces/sppp/data/*.json"))

In [7]:
def open_file(filepath: Path) -> FeatureCollection:
    with filepath.open("rb") as fin:
        return json.load(fin)


def to_dataframe(fc: FeatureCollection) -> pd.DataFrame:
    df = gpd.GeoDataFrame.from_features(fc["features"])
    df["validTime"] = pd.to_datetime(fc["validTime"], format="%Y%m%d_%H%M%S %Z")
    df["CENTROID"] = df["geometry"].centroid

    def ecef_vector():
        for point in df["geometry"].centroid:
            geo_point = wgs84.GeoPoint(
                longitude=point.x, latitude=point.y, degrees=True
            )
            yield geo_point.to_ecef_vector()

    df["ECEF_VECTOR"] = tuple(ecef_vector())

    return df.set_index(["validTime", "ID"])


def to_midf() -> pd.DataFrame:
    def generate():
        for file in all_files:
            fc = open_file(Path(file))
            yield to_dataframe(fc)

    return pd.concat(generate())


def _mask_a_and_b(index_a: pd.Index, index_b: pd.Index):
    return index_a.isin(index_b), index_b.isin(index_a)


def _mask_frames_by_id(frame_a: pd.DataFrame, frame_b: pd.DataFrame):
    mask_a, mask_b = _mask_a_and_b(
        frame_a.index.unique("ID"), frame_b.index.unique("ID")
    )
    return frame_a[mask_a], frame_b[mask_b]


midf = to_midf()

In [4]:
from sklearn.manifold import LocallyLinearEmbedding

lle = LocallyLinearEmbedding(n_components=1, n_neighbors=10)
fresh = midf[["PS", "MOTION_EAST", "MOTION_SOUTH", "CENTROID", "ECEF_VECTOR"]].copy()
fresh["STAB"] = lle.fit_transform(midf[["MUCAPE", "MLCAPE", "MLCIN"]])
fresh

Unnamed: 0_level_0,Unnamed: 1_level_0,PS,MOTION_EAST,MOTION_SOUTH,CENTROID,ECEF_VECTOR,STAB
validTime,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-10-11 00:00:53+00:00,89234,11,15.042,-14.019,POINT (-96.06829 38.23883),"ECEFvector(pvector=[[-530266.6328842767], [-49...",0.001093
2021-10-11 00:00:53+00:00,89321,4,0.423,-2.061,POINT (-80.63205 24.72810),"ECEFvector(pvector=[[943548.7176692891], [-571...",-0.000066
2021-10-11 00:00:53+00:00,89467,98,9.61,-11.265,POINT (-96.94631 36.71592),"ECEFvector(pvector=[[-619075.6783992505], [-50...",-0.002428
2021-10-11 00:00:53+00:00,89470,2,-1.606,0.677,POINT (-75.12905 35.39801),"ECEFvector(pvector=[[1335819.2918418334], [-50...",0.000463
2021-10-11 00:00:53+00:00,89519,100,12.789,-8.273,POINT (-98.23654 35.12736),"ECEFvector(pvector=[[-748148.9557063201], [-51...",-0.001125
...,...,...,...,...,...,...,...
2021-10-11 01:00:58+00:00,90389,4,11.984,-13.031,POINT (-97.09923 37.65983),"ECEFvector(pvector=[[-624810.164130543], [-501...",0.001099
2021-10-11 01:00:58+00:00,90390,33,13.982,-3.008,POINT (-98.02261 34.09865),"ECEFvector(pvector=[[-737892.7786927632], [-52...",-0.000144
2021-10-11 01:00:58+00:00,90391,10,17.65,-6.134,POINT (-98.89423 31.78376),"ECEFvector(pvector=[[-839030.8095662066], [-53...",0.000776
2021-10-11 01:00:58+00:00,90392,64,13.725,-0.784,POINT (-99.11583 31.53175),"ECEFvector(pvector=[[-862084.2989790718], [-53...",-0.000449


In [9]:
from tensorflow import keras
from keras.engine.sequential import Sequential


class State:
    __has_state: bool = False

    def __init__(self):
        self.__latests: pd.DataFrame = None
        self.__state: pd.DataFrame = None

    def __repr__(self) -> str:
        return self.__state.__repr__()

    @property
    def latests(self) -> pd.DataFrame:
        return self.__latests

    def set_storm(self, df: pd.DataFrame) -> None:

        self.__latests = df

        if not self.__has_state:
            self.__has_state = True
            self.__state = df
        else:
            old = self.__state
            self.__state = pd.concat([old, df])

    def iterstorms(self):
        df = self.__state.iloc[-2:].groupby("ID")
        for id, x in self.__state.iloc[-2:].groupby("ID"):
            yield x

    def has_hist(self) -> bool:
        return isinstance(self.__state, pd.DataFrame)


def build_model(frame_b: pd.DataFrame) -> Sequential:
    model = keras.models.Sequential(
        [
            keras.layers.Dense(
                22.5,
                activation="elu",
                input_shape=frame_b.shape,
            ),
            keras.layers.Dense(
                22.5,
                activation="sigmoid",
            ),
            keras.layers.Dense(3),
        ]
    )
    return model

In [10]:
import tensorflow as tf
import gym
from shapely.geometry import Point


class SPPPEnv(gym.Env):
    def __init__(self, state: "State") -> None:
        self.state = state

    def _compute_reward(self) -> int:
        return 1

    def step(self, action):
        # action is produced by DQN, action is discrete
        # self.cache.move(action)
        # compute reward based on state(position) of the car
        # storm_state = self.car_agent.getCarState()
        # reward = self._compute_reward(storm_state)
        # # check if the episode is done
        # car_controls = self.car_agent.getCarControls()
        # done = self._isDone(storm_state, car_controls, reward)
        # # log info
        # info = {}
        # # observation is RGB image from car's camera
        # observation = self.car_agent.observe()
        observation = 1
        reward = self._compute_reward()
        done = False
        info = {}
        return observation, reward, done, info

    @property
    def observation_space(self) -> pd.DataFrame:
        return self.state.latests


def iterframe(df: pd.DataFrame) -> Iterable[tuple[pd.Timestamp, pd.DataFrame]]:
    yield from df.groupby("validTime")


# policy = Policy()
n_inputs = 4
state = State()
env = SPPPEnv(state)
loss = keras.losses.binary_crossentropy
# import torch


def play_one_step(
    env: SPPPEnv,
    obs: np.ndarray,
    model: Sequential,
    loss_fn: Callable[[any], any],
):
    """policy gradient"""
    with tf.GradientTape() as tape:
        left_prob = model(obs[np.newaxis])
        action = tf.random.uniform([1, 1]) > left_prob

        y_target = tf.constant([[1.0]]) - tf.cast(action, tf.float32)

        loss = tf.reduce_mean(loss_fn(y_target, left_prob))

    grads = tape.gradient(loss, model.trainable_variables)
    # print(type(loss_fn))

    # obs, reward, done, info = env.step(int(action[0, 0].numpy()))
    # return obs, reward, done, info


n_outputs = 5

if __name__ == "__main__":
    for vt, df in iterframe(fresh):
        track_rewards = 0
        obs = env.reset()
        # evaluate prediction
        if state.has_hist():
            # there is existing storm information normalize frame_a and frame_b by the the in's in the index
            frame_a, frame_b = _mask_frames_by_id(state.latests, df)
            # assert that the frames are of an equal shape
            assert frame_a.shape == frame_b.shape
            # expected shape=(None, 62, 6), found shape=(1, 55, 3)
            bg = frame_b[["MOTION_EAST", "MOTION_SOUTH", "STAB"]].values.astype(
                np.float32
            )
            model = build_model(bg)

            play_one_step(env, bg, model, loss)
            # obs, reward, done, info = env.step(frame_b[["MOTION_EAST", "MOTION_SOUTH"]])
        # make a prediction
        state.set_storm(df)