In [24]:
import numpy as np
import matplotlib.pyplot as plt
import datetime
import pickle
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from geopack import geopack
import sys
import pandas as pd
import seaborn as sns
import xarray as xr
import datetime as datetime

#sys.path.append('/Users/sr2/opt/anaconda3/envs/nipr_env/')
sys.path.append('/home/sachin/miniconda3/envs/server_env')
from esn_dts_openloop import ESN, Tikhonov

In [37]:
path = r'/home/sachin/research/data/REPPU/fbig5min.dat' #Server

#read the REPPU data
with open (path) as f:
    rectype = np.dtype(np.float32)
    reppu_data = np.fromfile(f, rectype) #size = 109,900,800

In [39]:
#Read MHD dates and expand-out the date ranges
mhd_data = pd.read_csv('mhd_dates.csv')
expanded_dt = pd.concat([pd.Series(pd.date_range(start, end)) 
        for start, end in zip(mhd_data['start'], mhd_data['end'])])

In [41]:
quantity_to_predict = 'current'
units = 'mA/m2'

# Reshape the array to have dimensions (days, time (5-min intervals), lat, lon)
#288 will become 1440 at 1-min resolution in SMRAI v3.0
data_reshaped = reppu_data.reshape(len(expanded_dt), 288, 30, 80) 

# Define coordinates
time = np.arange(288)
lat = np.linspace(53.1, 89.7, 30) #30 intervals between 53.1° to 89.7°
lon = np.linspace(1.6, 357.6, 80) #80 intervals between 1.6° to 357.6°

# Create 'dt' variable combining dates and five-minute intervals
dt = []
for day in expanded_dt:
    for t in time:
        dt.append(day + pd.Timedelta(minutes=t*5))
dt = np.array(dt) #convert from list to numpy array

# Create xarray Dataset
ds = xr.Dataset({quantity_to_predict: (['dt', 'lat', 'lon'], data_reshaped.reshape(-1, 30, 80))},
                coords={'dt': dt, 'lat': lat, 'lon': lon})

# Add potential and units
ds[quantity_to_predict].attrs['units'] = units
ds

In [42]:
#open the omni data and merge it with the REPPU data
#omni_mhd_path = r'/Users/sr2/My Drive/Career/Employment/Current/JSPS/Research/Analysis/Apr-24/data/omni/'
omni_mhd_path = r'/home/ryuho/Documents/reddy/research/SMRAI/Data/OMNI/'

omni_df = pd.read_csv('omni_mhd_5min.csv')
#omni_df = pd.read_csv(omni_mhd_path+'omni_mhd_5min.csv')
omni_df.set_index('dt', inplace=True) #set the datetime as the index
omni_df = omni_df.dropna()

omni_ds = xr.Dataset(omni_df)
omni_ds['dt'] = pd.to_datetime(omni_ds['dt']) #convert the index to datetime

#merge OMNI with REPPU data
reppu_omni_ds = ds.merge(omni_ds, join='inner')
reppu_omni_ds.sortby('dt')
reppu_omni_ds

In [43]:
# Define the slice ranges for train and test data
train_slice = slice(0, 36576)  #36576 / 24 / 12 = 127 days = 80% of the data
test_slice = slice(36576, None)  # 9216 / 24 / 12 = 32 days = 20% of the data

# Create ds_train and ds_test datasets
ds_train = reppu_omni_ds.isel(dt=train_slice)
ds_test = reppu_omni_ds.isel(dt=test_slice)

In [45]:
#training data
X_train = ds_train.drop_vars(quantity_to_predict).to_array().values.T
y_train = ds_train[quantity_to_predict].values
y_train = y_train.reshape(-1, 30*80) #flatten lat & lon

#test data
X_test = ds_test.drop_vars(quantity_to_predict).to_array().values.T
y_test = ds_test[quantity_to_predict].values
y_test = y_test.reshape(-1, 30*80) #flatten lat & lon

input_dim = X_train.shape[1] #number of input features
output_dim = y_train.shape[1] #number of output features

In [46]:
# Define hyperparameter values
reservoir_size_list = [250, 350, 450, 550]
density_list = [0.15, 0.2, 0.25]
input_scaling_list = [0.1, 0.2, 0.3]
spectral_radius_list = [0.9, 0.95, 0.99]
beta_list = [1e-2, 1e-3, 1e-4]

#reservoir_size_list = [250]
#density_list = [0.1]
#input_scaling_list = [0.1]
#spectral_radius_list = [0.99]
#beta_list = [1e-3]

# Initialize lists to store results
results = []

start_time = datetime.datetime.now()

# Loop through each combination of hyperparameters
for reservoir_size in reservoir_size_list:
    for density in density_list:
        for input_scaling in input_scaling_list:
            for spectral_radius in spectral_radius_list:
                for beta in beta_list:
                    # Initialize model and optimizer with current hyperparameters
                    model = ESN(N_u=X_train.shape[1], N_y=y_train.shape[1], N_x=reservoir_size, density=density, input_scale=input_scaling, rho=spectral_radius)
                    optimizer = Tikhonov(N_u=X_train.shape[1], N_x=reservoir_size, N_y=y_train.shape[1], beta=beta)
                    
                    # Print the current combination of hyperparameters and the current date and time
                    print(f"Testing hyperparameters: reservoir_size={reservoir_size}, density={density}, input_scaling={input_scaling}, spectral_radius={spectral_radius}, beta={beta}")
                    print(f"at time: {datetime.datetime.now()}")
                    
                    # Train the model and measure time
                    start_time = datetime.datetime.now()
                    model.train(X_train, y_train, optimizer)
                    training_time = datetime.datetime.now() - start_time
                    training_time = training_time.total_seconds() / 60

                    # Make predictions
                    y_pred = model.predict(X_test)
                    
                    # Calculate RMSE and NRMSE
                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
                    nrmse = rmse / np.std(y_test)
                    
                    # Store results in a dictionary
                    result = {
                        "reservoir_size": reservoir_size,
                        "density": density,
                        "input_scaling": input_scaling,
                        "spectral_radius": spectral_radius,
                        "beta": beta,
                        "training_time": training_time,
                        "rmse": rmse,
                        "nrmse": nrmse
                    }
                    
                    # Append result to the results list
                    results.append(result)

                    #print RMSE and NRMSE
                    print(f"results: RMSE: {round(rmse,1)}", f"NRMSE: {round(nrmse,1)} \n")

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Export results to CSV
results_df.to_csv("hyperparameter_tuning_current_110424.csv", index=False)

end_time = datetime.datetime.now()
print(f"\n Total time taken: {end_time - start_time}")

results_df


Testing hyperparameters: reservoir_size=250, density=0.15, input_scaling=0.1, spectral_radius=0.9, beta=0.01
at time: 2024-04-11 12:42:45.408990
results: RMSE: 0.0 NRMSE: 0.7 

Testing hyperparameters: reservoir_size=250, density=0.15, input_scaling=0.1, spectral_radius=0.9, beta=0.001
at time: 2024-04-11 12:43:44.417323
results: RMSE: 0.0 NRMSE: 0.7 

Testing hyperparameters: reservoir_size=250, density=0.15, input_scaling=0.1, spectral_radius=0.9, beta=0.0001
at time: 2024-04-11 12:44:43.192897
results: RMSE: 0.0 NRMSE: 0.7 

Testing hyperparameters: reservoir_size=250, density=0.15, input_scaling=0.1, spectral_radius=0.95, beta=0.01
at time: 2024-04-11 12:45:41.759885
results: RMSE: 0.0 NRMSE: 0.7 

Testing hyperparameters: reservoir_size=250, density=0.15, input_scaling=0.1, spectral_radius=0.95, beta=0.001
at time: 2024-04-11 12:46:40.925514


In [35]:
params = pd.read_csv('hyperparameter_tuning_potential_100424.csv')
params.sort_values(by='nrmse', ascending=True)

Unnamed: 0,reservoir_size,density,input_scaling,spectral_radius,beta,training_time,rmse,nrmse
303,450,0.20,0.1,1.00,0.010,1.745727,6.166034,0.497650
243,450,0.10,0.1,0.90,0.010,1.769424,6.175187,0.498389
304,450,0.20,0.1,1.00,0.001,1.747088,6.176374,0.498485
300,450,0.20,0.1,0.95,0.010,1.777723,6.177001,0.498535
246,450,0.10,0.1,0.95,0.010,1.765420,6.182136,0.498950
...,...,...,...,...,...,...,...,...
133,250,0.15,0.3,1.00,0.001,0.972608,6.517364,0.526005
99,250,0.10,0.3,0.90,0.010,0.967704,6.521010,0.526300
126,250,0.15,0.3,0.90,0.010,0.960346,6.527493,0.526823
129,250,0.15,0.3,0.95,0.010,0.966117,6.528978,0.526943
