In [1]:
from transformer import Transformer
import silence_tensorflow.auto
import numpy as np

N_TIMESTEPS = 5
MAX_N_HARPS = 5
N_FEATURES = 21
INPUT_DIM = (N_TIMESTEPS, MAX_N_HARPS, N_FEATURES)
N_OUT = 2
OUTPUT_DIM =(N_TIMESTEPS,N_OUT)
NUM_LAYERS = 3
D_MODEL = 12
DFF = 24
NUM_HEADS = 3
RATE = 0.1

assert N_FEATURES > D_MODEL
assert D_MODEL % NUM_HEADS == 0

t = Transformer(
    num_layers=NUM_LAYERS,
    d_model=D_MODEL,
    num_heads=NUM_HEADS,
    dff=DFF,
    input_dimensions=INPUT_DIM,
    target_dimensions=OUTPUT_DIM,
    rate=RATE,
)


In [2]:
X = np.array([np.random.random(INPUT_DIM) for _ in range(10)])
y = np.array([np.random.random(OUTPUT_DIM) for _ in range(10)])

y_pred, _ = t((X, y), training=True)
np.any(np.isnan(y_pred))

False

In [3]:
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import numpy as np
metadata = {}
sharp_df = pd.read_csv('data/sharp.csv')
sharp_columns = [
    "timestamp",
    "harp",
    "USFLUX",
    "MEANGAM",
    "MEANGBT",
    "MEANGBZ",
    "MEANGBH",
    "TOTPOT",
    "TOTUSJZ",
    "TOTUSJH",
    "ABSNJZH",
    "SAVNCPP",
    "MEANPOT",
    "MEANSHR",
    "SHRGT45",
    "SIZE",
    "SIZE_ACR",
    "NACR",
    "NPIX",
    "MEANJZD",
    "MEANALP",
    "MEANJZH",
]

sharp_df = sharp_df[sharp_columns]
sharp_df = sharp_df.dropna()
sharp_df_notime = sharp_df[sharp_columns[1:]]

metadata["sharp_mean"] = sharp_df_notime.mean().to_numpy()
metadata["sharp_std"] = sharp_df_notime.std().to_numpy()
sharp_df_notime -= sharp_df_notime.mean()
sharp_df_notime /= sharp_df_notime.std()
sharp_df_notime["timestamp"] = sharp_df["timestamp"]
sharp_df = sharp_df_notime


xray_df = pd.read_csv('data/xray.csv')
xray_columns = ["timestamp", "Short", "Long"]
xray_df = xray_df[xray_columns]
xray_df = xray_df[(xray_df["Short"] > 0.0) & (xray_df["Long"] > 0.0)]
xray_df_notime = xray_df[xray_columns[1:]]
xray_df_notime = xray_df_notime.dropna()

metadata["xray_mean"] = xray_df_notime.mean().to_numpy()
metadata["xray_std"] = xray_df_notime.std().to_numpy()
xray_df_notime -= xray_df_notime.mean()
xray_df_notime += xray_df_notime.std()
xray_df_notime["timestamp"] = xray_df["timestamp"]
xray_df = xray_df_notime

data = sharp_df.merge(xray_df, on='timestamp')

train_data = []


def create_train_data_tuple():
    random_time = datetime.fromtimestamp(int(data.sample(1)["timestamp"]))
    start, end = (random_time -
                  timedelta(hours=1)).timestamp(), random_time.timestamp()
    tmp_data = data[(data["timestamp"] > start) & (data["timestamp"] <= end)]
    harps = tmp_data["harp"].unique()

    input_ = np.zeros(INPUT_DIM)
    output = np.zeros(OUTPUT_DIM)

    for i, harp in enumerate(harps):
        if i == 5:
            break
        harp_data = tmp_data[tmp_data["harp"] == harp]
        input_data = harp_data[sharp_columns[1:]].to_numpy()
        n_timesteps = input_data.shape[0]
        if i == 0:
            output_data = harp_data[["Short", "Long"]].to_numpy()
            output[:n_timesteps, :] = output_data

        input_[:n_timesteps, i, :] = input_data

    return input_, output


def get_data(size=1000):
    X, y = [], []
    for _ in tqdm(range(size)):
        X_, y_ = create_train_data_tuple()
        X.append(X_)
        y.append(y_)
    return X, y

X, y = get_data(1000)
X_val, y_val = get_data(100)

print(np.any(np.isnan(X)))
print(np.any(np.isnan(y)))
print(np.any(np.isnan(X_val)))
print(np.any(np.isnan(y_val)))

100%|██████████| 1000/1000 [00:01<00:00, 575.72it/s]
100%|██████████| 100/100 [00:00<00:00, 546.06it/s]

False
False
False
False





In [4]:
import tensorflow as tf
from tqdm import tqdm 


loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam()

BATCH_SIZE = 10
EPOCHS = 10

for epoch in range(EPOCHS):
    total_loss = 0
    N = len(X) // BATCH_SIZE
    for batch in tqdm(range(N)):
        X_batch, y_batch = X[epoch*BATCH_SIZE:(epoch+1)*BATCH_SIZE], y[epoch*BATCH_SIZE:(epoch+1)*BATCH_SIZE]
        X_batch, y_batch = np.array(X_batch), np.array(y_batch)
        
        assert not np.any(np.isnan(X_batch)), "X_batch should not include nan"
        assert not np.any(np.isnan(y_batch)), "y_batch should not include nan"
        
        with tf.GradientTape() as tape:
            y_pred, _ = t((X_batch, y_batch), training=False)
        
            assert not np.any(np.isnan(y_pred)), "y_pred should not include nan"
        
            loss_value = loss(y_batch, y_pred)
        
        gradients = tape.gradient(loss_value, t.trainable_variables)
        optimizer.apply_gradients(zip(gradients, t.trainable_variables))
        
        total_loss += loss_value.numpy()

    print(f"EPOCH {epoch}: mean_loss = {total_loss / N}")
    
    y_pred_val, _ = t((np.array(X_val), np.array(y_val)))
    val_loss = loss(y_val, y_pred_val).numpy()
    
    print(f"EPOCH {epoch}: val_loss = {val_loss}")


100%|██████████| 100/100 [00:11<00:00,  9.07it/s]


EPOCH 0: mean_loss = 0.05319864000281086
EPOCH 0: val_loss = 0.016247836872935295


100%|██████████| 100/100 [00:10<00:00,  9.21it/s]


EPOCH 1: mean_loss = 0.0008050289155653445
EPOCH 1: val_loss = 0.009096811525523663


100%|██████████| 100/100 [00:10<00:00,  9.34it/s]


EPOCH 2: mean_loss = 0.0005437742564026848
EPOCH 2: val_loss = 0.006016748026013374


100%|██████████| 100/100 [00:10<00:00,  9.20it/s]


EPOCH 3: mean_loss = 0.0005651527261034062
EPOCH 3: val_loss = 0.0025222860276699066


100%|██████████| 100/100 [00:10<00:00,  9.23it/s]


EPOCH 4: mean_loss = 0.0003520178583858069
EPOCH 4: val_loss = 0.0035190414637327194


100%|██████████| 100/100 [00:10<00:00,  9.20it/s]


EPOCH 5: mean_loss = 0.00015316514294681836
EPOCH 5: val_loss = 0.0024403678253293037


100%|██████████| 100/100 [00:10<00:00,  9.36it/s]


EPOCH 6: mean_loss = 0.00018476169545465383
EPOCH 6: val_loss = 0.0022367103956639767


100%|██████████| 100/100 [00:10<00:00,  9.24it/s]


EPOCH 7: mean_loss = 0.00021666039833689865
EPOCH 7: val_loss = 0.001420822343789041


100%|██████████| 100/100 [00:10<00:00,  9.34it/s]


EPOCH 8: mean_loss = 8.403554178585182e-05
EPOCH 8: val_loss = 0.0015561613254249096


100%|██████████| 100/100 [00:10<00:00,  9.31it/s]

EPOCH 9: mean_loss = 2.5262686369842414e-05
EPOCH 9: val_loss = 0.0010481922654435039





In [61]:
import pickle

MODELNAME = "8h-1"

metadata["n_timesteps"]  = N_TIMESTEPS
metadata["max_n_harps"]  = MAX_N_HARPS
metadata["n_features"] = N_FEATURES
metadata["n_out"]  = N_OUT
metadata["num_layers"] = NUM_LAYERS
metadata["d_model"] = D_MODEL
metadata["num_heads"] = NUM_HEADS
metadata["dff"] = DFF
metadata["input_dim"] = INPUT_DIM
metadata["output_dim"] = OUTPUT_DIM
metadata["rate"] = RATE



t.save_weights("models/"+MODELNAME)
with open("meta/"+MODELNAME+".pkl", "wb") as metafile:
    metafile.write(pickle.dumps(metadata))


In [7]:
import pickle
from tensorflow.keras import models
import numpy as np
import tensorflow as tf
from transformer import Transformer


class Avocato():
    def __init__(self, modelname):
        self.metadata = pickle.load(
            open("/mnt/hackathon2021/modelcache/meta/" + modelname + ".pkl",
                 "rb"))
        self.model = Transformer(
            num_layers=self.metadata["num_layers"],
            d_model=self.metadata["d_model"],
            num_heads=self.metadata["num_heads"],
            dff=self.metadata["dff"],
            input_dimensions=self.metadata["input_dim"],
            target_dimensions=self.metadata["output_dim"],
            rate=self.metadata["rate"],
        )
        self.model.compile()
        self.model.load_weights("/mnt/hackathon2021/modelcache/models/" +
                                modelname)

    def __call__(self, net_in):
        """
        Args:
        -----
            - net_in: pd dataframe consisting of n_timesteps of data for HARPS (timestamp, harp, ...parameters)
        """

        in_data = np.zeros(
            (1, self.metadata["n_timesteps"], self.metadata["max_n_harps"],
             self.metadata["n_features"]))
        unique_timesteps = net_in["timestamp"].unique()
        for t_id, timestep in enumerate(unique_timesteps):
            if t_id >= self.metadata["n_timesteps"]:
                continue
            unique_harps = net_in[net_in["timestamp"] ==
                                  timestep]["harp"].unique()
            for h_id, harp in enumerate(unique_harps):
                if h_id >= self.metadata["max_n_harps"]:
                    continue
                in_data[0, t_id, h_id] = net_in[
                    (net_in["timestamp"] == timestep) &
                    (net_in["harp"] == harp
                     )].loc[:,
                            net_in.columns != "timestamp"].to_numpy().reshape(
                                self.metadata["n_features"])
                in_data[0, t_id, h_id] -= self.metadata["sharp_mean"]
                in_data[0, t_id, h_id] /= self.metadata["sharp_std"]

        output = np.zeros((
            1,
            self.metadata["n_timesteps"],
            self.metadata["n_out"],
        ))

        for i in tf.range(self.metadata["n_timesteps"]):
            output, _ = self.model([in_data, output],
                                              training=False)

        return output * self.metadata["xray_std"] + self.metadata["xray_mean"], output

In [8]:
MODELNAME = "8h-1"
pred = Avocato(MODELNAME)
random_time = datetime.fromtimestamp(int(data.sample(1)["timestamp"]))
start, end = (random_time -
                timedelta(hours=1)).timestamp(), random_time.timestamp()
net_in = data[(data["timestamp"] > start) & (data["timestamp"] <= end)]
net_in = net_in[sharp_columns]

In [64]:
pred(net_in)

(<tf.Tensor: shape=(1, 5, 2), dtype=float32, numpy=
 array([[[2.9338644e-08, 8.2975248e-08],
         [2.9338644e-08, 8.2975248e-08],
         [2.9338640e-08, 8.2975255e-08],
         [2.9338642e-08, 8.2975248e-08],
         [2.9338645e-08, 8.2975248e-08]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 5, 2), dtype=float32, numpy=
 array([[[0.39534798, 0.02663079],
         [0.39534798, 0.02663079],
         [0.3953478 , 0.02663084],
         [0.39534786, 0.02663079],
         [0.3953481 , 0.02663079]]], dtype=float32)>)

In [9]:
net_in

Unnamed: 0,timestamp,harp,USFLUX,MEANGAM,MEANGBT,MEANGBZ,MEANGBH,TOTPOT,TOTUSJZ,TOTUSJH,...,MEANPOT,MEANSHR,SHRGT45,SIZE,SIZE_ACR,NACR,NPIX,MEANJZD,MEANALP,MEANJZH
6204,1555192000.0,-0.54972,1.46345,1.908409,-1.013866,-1.030139,-0.017631,3.013533,1.769887,1.400974,...,3.216637,2.081325,2.171866,4.14703,3.967177,3.964512,4.153964,-0.430136,-0.207704,-0.514086
6205,1555193000.0,-0.54972,1.46711,1.90347,-0.992921,-1.005052,-0.006907,3.015453,1.802248,1.431595,...,3.228418,2.074768,2.168833,4.146119,3.973329,3.970555,4.152936,-0.513553,-0.2141,-0.535922
6206,1555194000.0,-0.54972,1.475988,1.884381,-0.990427,-1.011456,-0.006732,3.008407,1.827295,1.4342,...,3.211264,2.069357,2.160744,4.142865,3.935855,3.932949,4.149537,-0.522516,-0.228642,-0.581886
6207,1555194000.0,-0.54972,1.463759,1.876689,-0.972287,-0.984784,0.015472,2.994272,1.762063,1.420395,...,3.270639,2.046461,2.153386,4.137834,3.908661,3.90564,4.144374,-0.485809,-0.23816,-0.619144
6208,1555195000.0,-0.54972,1.476268,1.870611,-0.975529,-0.994324,0.016288,3.005051,1.771407,1.451214,...,3.217841,2.045837,2.134906,4.132754,3.858636,3.855499,4.139157,-0.461205,-0.25195,-0.657207


In [None]:
(TIMESTAMPS, ACTIVE_REGIONs, PARAMETERS)