In [8]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from datetime import datetime
from tqdm import tqdm
import matplotlib.pyplot as plt

# Check if GPU is available
device = "cuda" if tf.config.list_physical_devices('GPU') else "cpu"

# Load dscovr_data
dscovr_data = {}
data_dir = "/content/sample_data"
for file in os.listdir(data_dir):
    if "dsc" in file:

        print(str(file))
        pd.read_csv('file1.csv', on_bad_lines='skip')
        data = data.iloc[1:]
        data["Date"] = pd.to_datetime(data["Date"])
        data["DOY"] = data["Date"].dt.dayofyear
        data["YEAR"] = data["Date"].dt.year
        data["HR"] = data["Time"].apply(lambda x: int(x.split(":")[0]) + int(x.split(":")[1]) / 60)
        data["Bt"] = np.sqrt(np.sum(data[["Bx", "By", "Bz"]] ** 2, axis=1))
        data = data.drop(columns=["Date", "Time"])
        key = "yr_" + file.split("_")[4]
        dscovr_data[key] = data

# Load kp_data
kp_data = pd.read_csv("kP_index_data.csv")
kp_data["HR"] = kp_data["HR"].apply(lambda x: float(x))
kp_data["kP"] = kp_data["kP"].apply(lambda x: x / 10)

# Define sliding_window function
def sliding_window(window_size, x_split, jump, index):
    full_window = np.arange(jump * index, window_size + jump * index)
    x_idxs = full_window[:x_split]
    y_idxs = full_window[x_split:]
    return x_idxs, y_idxs

# Define ForecastingDataset
class ForecastingDataset(tf.data.Dataset):
    def __init__(self, data, window_size=360, x_split=240, jump=60):
        self.idx_length = (len(data) - window_size) // jump + 1
        self.data = data
        self.window_size = window_size
        self.x_split = x_split
        self.jump = jump

    def __len__(self):
        return self.idx_length

    def __getitem__(self, idx):
        x_idxs, y_idxs = sliding_window(self.window_size, self.x_split, self.jump, idx)
        return self.data.iloc[x_idxs], self.data.iloc[y_idxs]

# Define kPPredictionDataset
class kPPredictionDataset(tf.data.Dataset):
    def __init__(self, discovr_data, kp_data, window_size=12*60, x_split=12*60-60, jump=180):
        self.idx_length = (len(discovr_data) - window_size) // jump + 1
        self.discovr_data = discovr_data
        self.kp_data = kp_data
        self.window_size = window_size
        self.x_split = x_split
        self.jump = jump

    def __len__(self):
        return self.idx_length

    def __getitem__(self, idx):
        x_idxs, y_idxs = sliding_window(self.window_size, self.x_split, self.jump, idx)
        y_kp = []
        for idx, row in self.discovr_data.iloc[y_idxs][::-60][::-1].iterrows():
            year = row["YEAR"]
            doy = row["DOY"]
            hour = row["HR"]
            y_kp.append(self.kp_data[(self.kp_data["HR"] == hour) & (self.kp_data["DOY"] == doy) & (self.kp_data["YEAR"] == year)].drop(columns=["YEAR", "DOY", "HR"]).to_numpy())
        return tf.convert_to_tensor(self.discovr_data.iloc[x_idxs].drop(columns=["YEAR", "HR"])[["DOY", "Bt", "Bx", "By", "Bz"]].to_numpy()), tf.convert_to_tensor(np.stack(y_kp))

# Create kP datasets
kp_ds = kPPredictionDataset(dscovr_data["yr_2023"], kp_data[kp_data["YEAR"]==2023])
train_size = int(len(kp_ds) * 0.75)
train_set, val_set = tf.data.experimental.random_split(kp_ds, [train_size, len(kp_ds) - train_size])
train_dataloader = train_set.batch(16, drop_remainder=True).shuffle(buffer_size=len(train_set))
test_dataloader = train_set.batch(16, drop_remainder=True).shuffle(buffer_size=len(train_set))

# Define LSTM model
class LSTM(tf.keras.Model):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = tf.keras.layers.LSTM(
            hidden_size, return_sequences=True, return_state=True
        )
        self.fc = tf.keras.layers.Dense(num_classes)

    def call(self, x):
        x, h_state, c_state = self.lstm(x)
        x = self.fc(x)
        return x

# Create and compile the LSTM model
model = LSTM(num_classes=5, input_size=5, hidden_size=128, num_layers=2)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

# Training loop
EPOCHS = 10
for epoch in range(EPOCHS):
    total_loss = 0
    for x, y in train_dataloader:
        with tf.GradientTape() as tape:
            predictions = model(x)
            loss = tf.keras.losses.MeanSquaredError()(y, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        total_loss += loss
    print(f"Epoch {epoch+1}, Loss: {total_loss.numpy() / len(train_dataloader)}")

# Evaluate the model on the test set
test_loss = tf.keras.metrics.MeanSquaredError()
for x, y in test_dataloader:
    predictions = model(x)
    test_loss(y, predictions)
print(f"Test Loss: {test_loss.result().numpy()}")

# Plot some predictions
n_samples = 5
for x, y in test_dataloader.take(n_samples):
    predictions = model(x)
    plt.figure(figsize=(12, 6))
    for i in range(5):
        plt.subplot(5, 1, i+1)
        plt.plot(y[i, :, 0], label='True')
        plt.plot(predictions[i, :, 0], label='Predicted')
        plt.legend()
    plt.show()

dsc_fc_summed_spectra_2023_v01.csv


FileNotFoundError: ignored

In [None]:
from google.colab import drive
drive.mount('/content/drive')