In [None]:
!pip install tensorflow 

In [2]:
solar_data.describe()

Unnamed: 0,YYYY,DOY,HR,MN,Timeshift,Field magnitude average(nT),Speed(km/s),Proton Density(n/cc),Temperature(K),FlowPressure(nPa)
count,57600.0,57600.0,57600.0,57600.0,57600.0,57600.0,57600.0,57600.0,57600.0,57600.0
mean,2013.5,238.75,11.5,29.5,3429.379497,6.090834,413.095273,6.981718,89186.54,2.070726
std,5.766331,0.661444,6.922247,17.318253,1045.662655,3.248565,99.650143,6.422742,92868.07,1.636582
min,2004.0,238.0,0.0,0.0,-2077.0,0.22,253.6,0.76,5653.0,0.29
25%,2008.75,238.0,5.75,14.75,2761.0,4.05,343.0,3.18,27887.5,1.14
50%,2013.5,239.0,11.5,29.5,3402.0,5.18,386.6,4.86,60427.5,1.63
75%,2018.25,239.0,17.25,44.25,4016.0,6.9,442.0,8.58,117563.2,2.37
max,2023.0,240.0,23.0,59.0,10028.0,21.38,742.2,64.69,1028652.0,17.66


In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras

# Directory containing the data files
data_dir = 'omni_data'

# Initialize an empty DataFrame to store the data
solar_data = pd.DataFrame()

# Loop through the years from 2004 to 2023
for year in range(2004, 2024):
    # Construct the filename for the data file
    file_name = f'omni_min_{year}.lst'
    file_path = os.path.join(data_dir, file_name)

    # columns name for easy access
    common_header = ["YYYY", "DOY", "HR", "MN"]
    headers = ["Timeshift", "Field magnitude average(nT)", "Speed(km/s)", "Proton Density(n/cc)", "Temperature(K)",
               "FlowPressure(nPa)"]

    # graph_label for headers (short)
    graph_header = ["TimeShift", "Magnetic Field", "Speed", "Density", "Tempt", "Pressure"]

    common_header.extend(headers)

    # Check if the file exists before attempting to read it
    if os.path.isfile(file_path):
        # Read the data from the file into a DataFrame
        data = pd.read_csv(file_path, sep='\s+', names=common_header)

        # Append the data to the combined DataFrame
        solar_data = solar_data.append(data)

# Reset the index of the combined DataFrame
solar_data.reset_index(drop=True, inplace=True)

# To remove the outliers
# This takes the highest value(999,9999,999999) outliers from the series so before using this we should be sure our all columns
# have outliers and then later remove the column not having outliers from the list headers
for itm in headers:
    solar_data.replace(to_replace=solar_data[itm].max(), value=np.NaN, inplace=True)
    solar_data.fillna(method='bfill', inplace=True)

# Define selected features and target variables
selected_features = ["YYYY","Proton Density(n/cc)", "Temperature(K)"]
target_variables = ["Speed(km/s)", "Field magnitude average(nT)"]

# Split the data into features (X) and target (y)
X = solar_data[selected_features].values
y = solar_data[target_variables].values

# Normalize the data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter Tuning
param_grid = {
    'lstm_units': [50, 100, 150],
    'learning_rate': [0.001, 0.01, 0.1],
    'batch_size': [16, 32, 64]
}

best_mse = float('inf')
best_model = None

for lstm_units in param_grid['lstm_units']:
    for learning_rate in param_grid['learning_rate']:
        for batch_size in param_grid['batch_size']:
            model = keras.Sequential([
                keras.layers.LSTM(lstm_units, activation='relu', input_shape=(X_train.shape[1], 1)),
                keras.layers.BatchNormalization(),  # Batch normalization layer
                keras.layers.Dropout(0.2),  # Dropout layer for regularization
                keras.layers.Dense(2)
            ])

            optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
            model.compile(optimizer=optimizer, loss='mean_squared_error')

            # Learning Rate Scheduling
            lr_scheduler = keras.callbacks.LearningRateScheduler(lambda epoch: learning_rate / (2 ** (epoch // 10)))
            
            # Early Stopping
            early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

            # Training the model
            history = model.fit(X_train.reshape((X_train.shape[0], X_train.shape[1], 1)), y_train,
                                epochs=10, batch_size=batch_size, 
                                validation_data=(X_test.reshape((X_test.shape[0], X_test.shape[1], 1)), y_test),
                                callbacks=[lr_scheduler, early_stopping])

            # Evaluate the model
            val_loss = history.history['val_loss'][-1]
            
            '''# If the current model has a lower validation loss, update the best model
            if val_loss <= best_mse:
                best_mse = val_loss
                best_model = model'''

# Save the best model to a file
model.save('omni_rnn.h5')  # Save the model in HDF5 format

# Make predictions on the test data using the best model
y_pred = model.predict(X_test.reshape((X_test.shape[0], X_test.shape[1], 1)))
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test)

# Calculate and print the Mean Squared Error (MSE) for each target variable
mse_speed = np.mean((y_pred[:, 0] - y_test[:, 0]) ** 2)
mse_field_magnitude = np.mean((y_pred[:, 1] - y_test[:, 1]) ** 2)

print(f'Best MSE for Speed: {mse_speed}')
print(f'Best MSE for Field Magnitude: {mse_field_magnitude}')


  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)
  solar_data = solar_data.append(data)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10


Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


  saving_api.save_model(


Best MSE for Speed: 2849.6518725708934
Best MSE for Field Magnitude: 7.702206061143276
