In [None]:
from transformer import Transformer
import silence_tensorflow.auto
import numpy as np

N_TIMESTEPS = 5
MAX_N_HARPS = 5
N_FEATURES = 21
INPUT_DIM = (N_TIMESTEPS, MAX_N_HARPS, N_FEATURES)
N_OUT = 2
OUTPUT_DIM =(N_TIMESTEPS,N_OUT)
NUM_LAYERS = 3
D_MODEL = 12
NUM_HEADS = 3

assert N_FEATURES > D_MODEL
assert D_MODEL % NUM_HEADS == 0

t = Transformer(
    num_layers=NUM_LAYERS,
    d_model=D_MODEL,
    num_heads=NUM_HEADS,
    dff=24,
    input_dimensions=INPUT_DIM,
    target_dimensions=OUTPUT_DIM,
    rate=0.1,
)


In [None]:
X = np.array([np.random.random(INPUT_DIM) for _ in range(10)])
y = np.array([np.random.random(OUTPUT_DIM) for _ in range(10)])

y_pred, _ = t((X, y), training=True)
np.any(np.isnan(y_pred))

In [None]:
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import numpy as np

sharp_df = pd.read_csv('data/sharp.csv')
sharp_columns = [
    "timestamp",
    "harp",
    "USFLUX",
    "MEANGAM",
    "MEANGBT",
    "MEANGBZ",
    "MEANGBH",
    "TOTPOT",
    "TOTUSJZ",
    "TOTUSJH",
    "ABSNJZH",
    "SAVNCPP",
    "MEANPOT",
    "MEANSHR",
    "SHRGT45",
    "SIZE",
    "SIZE_ACR",
    "NACR",
    "NPIX",
    "MEANJZD",
    "MEANALP",
    "MEANJZH",
]

sharp_df = sharp_df[sharp_columns]
sharp_df = sharp_df.dropna()

# for col in sharp_columns:
#     sharp_df[col] -= sharp_df[col].mean()
#     sharp_df[col] /= sharp_df[col].std()


xray_df = pd.read_csv('data/xray.csv')
xray_columns = ["timestamp", "Short", "Long"]
xray_df = xray_df[xray_columns]
xray_df = xray_df.dropna()

# for col in xray_columns:
#     xray_df[col] -= xray_df[col].mean()
#     xray_df[col] /= xray_df[col].std()


data = sharp_df.merge(xray_df, on='timestamp')

train_data = []


def create_train_data_tuple():
    random_time = datetime.fromtimestamp(int(data.sample(1)["timestamp"]))
    start, end = (random_time -
                  timedelta(hours=1)).timestamp(), random_time.timestamp()
    tmp_data = data[(data["timestamp"] > start) & (data["timestamp"] <= end)]
    harps = tmp_data["harp"].unique()

    input_ = np.zeros(INPUT_DIM)
    output = np.zeros(OUTPUT_DIM)

    for i, harp in enumerate(harps):
        if i == 5:
            break
        harp_data = tmp_data[tmp_data["harp"] == harp]
        input_data = harp_data[sharp_columns[1:]].to_numpy()
        n_timesteps = input_data.shape[0]
        if i == 0:
            output_data = harp_data[["Short", "Long"]].to_numpy()
            output[:n_timesteps, :] = output_data

        input_[:n_timesteps, i, :] = input_data

    return input_, output


def get_data(size=1000):
    X, y = [], []
    for _ in tqdm(range(size)):
        X_, y_ = create_train_data_tuple()
        X.append(X_)
        y.append(y_)
    return X, y

X, y = get_data(1000)
X_val, y_val = get_data(100)

print(np.any(np.isnan(X)))
print(np.any(np.isnan(y)))
print(np.any(np.isnan(X_val)))
print(np.any(np.isnan(y_val)))

In [None]:
import tensorflow as tf
from tqdm import tqdm 


loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam()

BATCH_SIZE = 10
EPOCHS = 10

for epoch in range(EPOCHS):
    total_loss = 0
    N = len(X) // BATCH_SIZE
    for batch in tqdm(range(N)):
        X_batch, y_batch = X[epoch*BATCH_SIZE:(epoch+1)*BATCH_SIZE], y[epoch*BATCH_SIZE:(epoch+1)*BATCH_SIZE]
        X_batch, y_batch = np.array(X_batch), np.array(y_batch)
        
        assert not np.any(np.isnan(X_batch)), "X_batch should not include nan"
        assert not np.any(np.isnan(y_batch)), "y_batch should not include nan"
        
        with tf.GradientTape() as tape:
            y_pred, _ = t((X_batch, y_batch))
        
            assert not np.any(np.isnan(y_pred)), "y_pred should not include nan"
        
            loss_value = loss(y_batch, y_pred)
        
        gradients = tape.gradient(loss_value, t.trainable_variables)
        optimizer.apply_gradients(zip(gradients, t.trainable_variables))
        
        total_loss += loss_value.numpy()

    print(f"EPOCH {epoch}: mean_loss = {total_loss / N}")
    
    y_pred_val, _ = t((np.array(X_val), np.array(y_val)))
    val_loss = loss(y_val, y_pred_val).numpy()
    
    print(f"EPOCH {epoch}: val_loss = {val_loss}")


In [None]:
X