In [1]:
import numpy as np
import os
import random
import torch
from PD.PlasmaDataset import PlasmaDataset
from PML.PlasmaModel import PlasmaModel
from PML.PMLParameters import PMLParameters

In [2]:
DATA_SPLIT = [0.5, 0.2, 0.3] #train/test/val splits
DATA_FRAC = 1 #fraction of files to load data from
DATASET_NAME = "main"
HDF5_DATA_DIR = "./jtext_data/low_freq" #data to source hdf5 files from
ORG_DATA_DIR = "./jtext_org" #directory for exporting data CSVs
MODEL_COUNT = 2000 #number of models to randomly generate and train
MODEL_DIR = "./models" #directory to save trained models
MODEL_METRICS = "./models/metrics.json"
FEATS = [
    'AXUV_CA_02',
    'AXUV_CA_06',
    'AXUV_CA_10',
    'AXUV_CA_14',
    'AXUV_CB_18',
    'AXUV_CB_22',
    'AXUV_CB_26',
    'AXUV_CB_30',
    'AXUV_CE_66',
    'AXUV_CE_70',
    'AXUV_CE_74',
    'AXUV_CE_78',
    'AXUV_CF_82',
    'AXUV_CF_86',
    'AXUV_CF_90',
    'AXUV_CF_94',
    'P_in',
    'P_rad',
    'bt',
    'dx', 'dy', 
    'Iohp', 'ip', 
    'ip_error',
    'qa_proxy',
    'radiation_proxy',
    'ne0',
    'ne_nG',
    'rotating_mode_proxy'
] #model features
HP_SEARCH = 'random' #hyperparameter search mode

In [3]:
#initialize processed data with some dummy training feature data directories for testing (uses dx and dy features)
PROCESSED_DATA = {
    "train_norm"   : f'./jtext_org/train/train-norm-{DATASET_NAME}.csv',
    "train_labels" : f'./jtext_org/train/train-labels-{DATASET_NAME}.csv',
    "test_norm"    : f'./jtext_org/test/test-norm-{DATASET_NAME}.csv',
    "test_labels"  : f'./jtext_org/test/test-labels-{DATASET_NAME}.csv',
    "val_norm"     : f'./jtext_org/val/val-norm-{DATASET_NAME}.csv',
    "val_labels"   : f'./jtext_org/val/val-labels-{DATASET_NAME}.csv'
}

In [4]:
#designate feature ranges and static parameters for hyperparameter search
#note to current user - grid search is currently borked - don't use (plus not efficient)
PARAMETER_RANGES = {
    'lr'            : [0.001, 0.05], #learning rate range
    'lstm_layers'   : [[200,800], [80,320]], #lstm layer count and hidden size ranges
    'linear_layers' : [[50,300], [50,250]], #linear layer count and neuron ranges
    'dropout_layers': [[0.05, 0.2]], #dropout layer count and dropout probabilities
    'epochs'        : [10, 50]
}
STATIC_PARAMETERS = {
    'batch_size'       : 16,
    'criterion'        : torch.nn.BCEWithLogitsLoss(), #uses binary cross entropy loss
    'init'             : torch.nn.init.xavier_normal_,
    'input_size'       : len(FEATS), #set input size to # of features
    'lstm_activation'  : torch.nn.functional.tanh, #LSTM layers activation function
    'linear_activation': torch.nn.functional.relu, #Linear layers activation function
    'optimizer'        : torch.optim.Adam, #use ADAM optimizer
    'output_activation': torch.nn.functional.sigmoid, #output neuron activation
}

In [5]:
def makeDataset(dataset:"PlasmaDataset", split:list, features:list, frac:float=1, preview=False):
    dataset.initialize() #creates train/test/val subdatasets
    dataset.sourceFiles(data_split = split, data_frac = frac) #initialize split/datafrac and gather hdf5 file info
    dataset.sourceData(features) #source specified feature data from files
    dataset.calcStats() #calculate data statistics from raw feature data
    dataset.normalize() #use data statistics to normalize data
    dataset.saveCSV(['train', 'test', 'val', 'stats'], name=DATASET_NAME) #export dataset to model-loadable CSV
    if preview:
        dataset.preview() #preview datasets
    dataset.deleteDatasets() #remove dataset from memory (since saved to CSV)

In [6]:
def makeModels(
                modeler:"PlasmaModel", 
                processed_data:dict, 
                parameter_ranges:dict, 
                static_parameters:dict, 
                model_count:int, 
                searchmode:str
              ):
    modeler.makeHyperparameterSet(
                    static_params=static_parameters, 
                    param_ranges=parameter_ranges, 
                    count=model_count, 
                    mode=searchmode
    )
    modeler.prepareData(processed_data)
    modeler.runModelSearch()

In [None]:
JTEXT_LOW = PlasmaDataset(org_directory = ORG_DATA_DIR, h5_source = HDF5_DATA_DIR)
MODELER = PlasmaModel(MODEL_DIR, static_parameters=STATIC_PARAMETERS, json_save_file=MODEL_METRICS)
makeDataset(JTEXT_LOW, split=DATA_SPLIT, frac=DATA_FRAC, features=FEATS)
makeModels(
    modeler=MODELER,
    processed_data=PROCESSED_DATA,
    parameter_ranges=PARAMETER_RANGES,
    static_parameters=STATIC_PARAMETERS,
    model_count=MODEL_COUNT,
    searchmode=HP_SEARCH
)

{'batch_size': 16, 'criterion': BCEWithLogitsLoss(), 'init': <function xavier_normal_ at 0x00000279D1D9AF20>, 'input_size': 29, 'lstm_activation': <function tanh at 0x00000279D1D12340>, 'linear_activation': <function relu at 0x00000279D1D11940>, 'optimizer': <class 'torch.optim.adam.Adam'>, 'output_activation': <function sigmoid at 0x00000279D1D123E0>, 'lr': 0.014414732983136957, 'lstm_layers': [245, 260], 'linear_layers': [195, 243], 'dropout_layers': [0.14663082894211107], 'epochs': 30}


Consider using tensor.detach() first. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\native\Scalar.cpp:23.)
  running_loss += loss.item() * inputs.size(0)


Epoch [1/30], Training Loss: 0.8293
Epoch [1/30], Validation Loss: 0.7668
Epoch [2/30], Training Loss: 0.6716
Epoch [2/30], Validation Loss: 0.7022
Epoch [3/30], Training Loss: 0.6664
Epoch [3/30], Validation Loss: 0.6715
Epoch [4/30], Training Loss: 0.6619
Epoch [4/30], Validation Loss: 0.6690
Epoch [5/30], Training Loss: 0.6633
Epoch [5/30], Validation Loss: 0.6653
Epoch [6/30], Training Loss: 0.6463
Epoch [6/30], Validation Loss: 0.6706
Epoch [7/30], Training Loss: 0.6634
Epoch [7/30], Validation Loss: 0.6747
Epoch [8/30], Training Loss: 0.6443
Epoch [8/30], Validation Loss: 0.6799
Epoch [9/30], Training Loss: 0.6516
Epoch [9/30], Validation Loss: 0.6669
Epoch [10/30], Training Loss: 0.6496
Epoch [10/30], Validation Loss: 0.6819
Epoch [11/30], Training Loss: 0.6445
Epoch [11/30], Validation Loss: 0.6653
Epoch [12/30], Training Loss: 0.6622
Epoch [12/30], Validation Loss: 0.6627
Epoch [13/30], Training Loss: 0.6521
Epoch [13/30], Validation Loss: 0.7068
Epoch [14/30], Training Loss: 