In [21]:
import pandas as pd
import pickle
from keras import Sequential
from keras.layers import LSTM, SimpleRNN, GRU, Dense
from keras.losses import CategoricalCrossentropy
import numpy as np
import os
from sklearn.metrics import precision_recall_fscore_support

## Goal

Given the past day of data, predict the largest flare in the next day. In the file `dataWrangling.py`, we read, parse, and split data into training and testing sets.

In [28]:
def loadPickledFile(fileName):
    bytes_in = bytearray(0)
    max_bytes = 2**31 - 1
    input_size = os.path.getsize(fileName)
    with open(fileName, 'rb') as file:
        for _ in range(0, input_size, max_bytes):
            bytes_in += file.read(max_bytes)
        obj = pickle.loads(bytes_in)
        file.close()
    return obj

In [29]:
X_train = loadPickledFile("timeseries/X_train.pck")
X_test = loadPickledFile("timeseries/X_test.pck")
y_train = loadPickledFile("timeseries/y_train.pck")
y_test = loadPickledFile("timeseries/y_test.pck")

In [32]:
num_features = X_train[0].shape[1]
model = Sequential()
# Input Layer
# model.add(Dense(num_features, input_shape=(dataPointsPerX, num_features),activation='relu'))
model.add(Dense(num_features, input_shape=X_train[0].shape,activation='relu'))
# Hidden Layers: RNN
model.add(LSTM(units=2*num_features)) # Add return_sequences=True if you want to add more architecture
# Output Layer
model.add(Dense(1, activation='relu'))

# model.compile(optimizer="adam", loss=CategoricalCrossentropy())
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 120, 6)            42        
                                                                 
 lstm_2 (LSTM)               (None, 12)                912       
                                                                 
 dense_5 (Dense)             (None, 1)                 13        
                                                                 
Total params: 967 (3.78 KB)
Trainable params: 967 (3.78 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [33]:
epochs = 25
model.fit(X_train,
          y_train,
          epochs=epochs,
          verbose=1,
          shuffle=False)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x27365b29e10>

In [17]:
classLabels = {
    0: "0",
    1: "B",
    2: "C",
    3: "M",
    4: "X"
}

def threshold_output(output):
    thresholded = []
    for val in output:
        if val < 1:
            thresholded.append(0)
        elif val < 2:
            thresholded.append(1)
        elif val < 3:
            thresholded.append(2)
        else:
            thresholded.append(3)
    return np.array(thresholded)
    

y_pred = model.predict(X_test)
y_pred = threshold_output(y_pred)

p,r,f,s = precision_recall_fscore_support(y_test, y_pred)

def printMetrics(p,r,f,s):
    for i in range(len(p)):
        print(f"Metrics for {classLabels[i]} class flares")
        print(f"\tPrecision: {p[i]}")
        print(f"\tRecall:    {r[i]}")
        print(f"\tF-Score:   {f[i]}")
        print(f"\tSupport:   {s[i]}")

printMetrics(p,r,f,s)
# X_train[0]

Metrics for 0 class flares
	Precision: 0.24953617810760667
	Recall:    1.0
	F-Score:   0.3994060876020787
	Support:   269
Metrics for B class flares
	Precision: 0.0
	Recall:    0.0
	F-Score:   0.0
	Support:   274
Metrics for C class flares
	Precision: 0.0
	Recall:    0.0
	F-Score:   0.0
	Support:   271
Metrics for M class flares
	Precision: 0.0
	Recall:    0.0
	F-Score:   0.0
	Support:   264


  _warn_prf(average, modifier, msg_start, len(result))
