In [1]:
def define_searchspace(trial):
    model_type = trial.suggest_categorical('model', ['fsr_model.LSTM', 'fsr_model.CNN_LSTM', 'fsr_model.ANN'])
    if model_type == 'fsr_model.LSTM':
        trial.suggest_categorical('model_args/hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/num_layer', 1, 8)
    elif model_type == 'fsr_model.CNN_LSTM':
        trial.suggest_categorical('model_args/cnn_hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_categorical('model_args/lstm_hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/cnn_num_layer', 1, 8)
        trial.suggest_int('model_args/lstm_num_layer', 1, 8)
    elif model_type == 'fsr_model.ANN':
        trial.suggest_categorical('model_args/hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/num_layer', 1, 8)
    trial.suggest_categorical('criterion', ['torch.nn.MSELoss'])
    trial.suggest_categorical('optimizer', [
        'torch.optim.Adam',
        'torch.optim.NAdam',
        'torch.optim.Adagrad',
        'torch.optim.RAdam',
        'torch.optim.SGD',
    ])
    trial.suggest_float('optimizer_args/lr', 1e-5, 1e-1, log=True)
    trial.suggest_categorical('scaler', [
        'sklearn.preprocessing.StandardScaler',
        'sklearn.preprocessing.MinMaxScaler',
        'sklearn.preprocessing.RobustScaler',
    ])

In [2]:
import ray.tune
import ray.air
import ray.air.integrations.wandb
import ray.tune.schedulers
import datasource
from trainable import Trainable
import ray.tune.search
import ray.tune.search.optuna

tuner = ray.tune.Tuner(
    trainable=ray.tune.with_resources(
        ray.tune.with_parameters(Trainable, data=datasource.get_data()),
        {'cpu':2},
    ),
    tune_config=ray.tune.TuneConfig(
        num_samples=-1,
        scheduler=ray.tune.schedulers.ASHAScheduler(
            max_t=100,
            grace_period=1,
            reduction_factor=2,
            brackets=1,
            metric='rmse',
            mode='min',
        ),
        search_alg=ray.tune.search.optuna.OptunaSearch(
            space=define_searchspace,
            metric='rmse',
            mode='min',
        ),
    ), 
    run_config=ray.air.RunConfig(
        # callbacks=[
        #     ray.air.integrations.wandb.WandbLoggerCallback(project='FSR-prediction'),
        # ],
        checkpoint_config=ray.air.CheckpointConfig(
            num_to_keep=3,
            checkpoint_score_attribute='rmse',
            checkpoint_score_order='min',
            checkpoint_frequency=5,
            checkpoint_at_end=True,
        ), 
    ),
) 
results = tuner.fit()

[I 2023-07-02 05:45:02,257] A new study created in memory with name: optuna
2023-07-02 05:45:04,399	INFO worker.py:1627 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
2023-07-02 05:45:05,679	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.


0,1
Current time:,2023-07-02 05:45:51
Running for:,00:00:45.72
Memory:,4.4/7.7 GiB

Trial name,status,loc,criterion,model,model_args/cnn_hidde n_size,model_args/cnn_num_l ayer,model_args/hidden_si ze,model_args/lstm_hidd en_size,model_args/lstm_num_ layer,model_args/num_layer,optimizer,optimizer_args/lr,scaler,iter,total time (s),rmse,mae,mape
Trainable_1102ee3d,RUNNING,172.26.215.93:62087,torch.nn.MSELoss,fsr_model.CNN_LSTM,8.0,7.0,,8.0,7.0,,torch.optim.Adam,0.0762723,sklearn.preproc_0270,7.0,40.2177,459.73,248.45,739182000.0
Trainable_108619c0,RUNNING,172.26.215.93:62456,torch.nn.MSELoss,fsr_model.LSTM,,,64.0,,,1.0,torch.optim.NAdam,0.0240354,sklearn.preproc_0330,4.0,16.5672,323.699,164.661,2.94388e+16
Trainable_a0e16dcc,RUNNING,172.26.215.93:62827,torch.nn.MSELoss,fsr_model.CNN_LSTM,64.0,4.0,,32.0,3.0,,torch.optim.Adagrad,0.0743695,sklearn.preproc_0330,,,,,
Trainable_2a8df77d,PENDING,,torch.nn.MSELoss,fsr_model.CNN_LSTM,8.0,6.0,,32.0,1.0,,torch.optim.Adam,0.0951163,sklearn.preproc_0330,,,,,
Trainable_e4484724,TERMINATED,172.26.215.93:62159,torch.nn.MSELoss,fsr_model.CNN_LSTM,128.0,2.0,,32.0,6.0,,torch.optim.RAdam,0.000161057,sklearn.preproc_02d0,1.0,6.46928,521.451,346.201,5.43014e+17
Trainable_2dce8232,TERMINATED,172.26.215.93:62269,torch.nn.MSELoss,fsr_model.LSTM,,,16.0,,,4.0,torch.optim.RAdam,4.56347e-05,sklearn.preproc_02d0,1.0,4.31968,501.187,313.931,4.86482e+17
Trainable_9dc0cd94,TERMINATED,172.26.215.93:62621,torch.nn.MSELoss,fsr_model.ANN,,,64.0,,,3.0,torch.optim.SGD,0.00373404,sklearn.preproc_02d0,1.0,2.80711,524.427,280.731,3.06358e+17


[2m[36m(Trainable pid=62087)[0m tensor([[ 1.1350e-01,  3.3895e-01,  2.6895e-01, -1.1435e-01,  5.3234e-01,
[2m[36m(Trainable pid=62087)[0m           1.0435e+00],
[2m[36m(Trainable pid=62087)[0m         [ 1.6173e-01,  3.9626e-01,  3.0414e-01, -1.8258e-01,  5.3234e-01,
[2m[36m(Trainable pid=62087)[0m           1.0080e+00],
[2m[36m(Trainable pid=62087)[0m         [ 2.4212e-01,  4.8796e-01,  3.3932e-01, -2.2807e-01,  5.3234e-01,
[2m[36m(Trainable pid=62087)[0m           9.7245e-01],
[2m[36m(Trainable pid=62087)[0m         [ 3.3858e-01,  6.1405e-01,  3.8624e-01, -2.8114e-01,  5.3234e-01,
[2m[36m(Trainable pid=62087)[0m           9.7245e-01],
[2m[36m(Trainable pid=62087)[0m         [ 4.2700e-01,  7.5160e-01,  4.2143e-01, -3.1905e-01,  5.0681e-01,
[2m[36m(Trainable pid=62087)[0m           9.7245e-01],
[2m[36m(Trainable pid=62087)[0m         [ 5.0738e-01,  9.0061e-01,  4.9180e-01, -3.4937e-01,  4.8129e-01,
[2m[36m(Trainable pid=62087)[0m           9.7245e-01]

Trial name,date,done,hostname,iterations_since_restore,mae,mape,node_ip,pid,rmse,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
Trainable_108619c0,2023-07-02_05-45-45,False,DESKTOP-0P789CI,3,179.974,2.8565e+16,172.26.215.93,62456,349.461,13.2175,3.81797,13.2175,1688244345,3,108619c0
Trainable_1102ee3d,2023-07-02_05-45-50,False,DESKTOP-0P789CI,7,248.45,739182000.0,172.26.215.93,62087,459.73,40.2177,5.38046,40.2177,1688244350,7,1102ee3d
Trainable_2dce8232,2023-07-02_05-45-28,True,DESKTOP-0P789CI,1,313.931,4.86482e+17,172.26.215.93,62269,501.187,4.31968,4.31968,4.31968,1688244328,1,2dce8232
Trainable_9dc0cd94,2023-07-02_05-45-44,True,DESKTOP-0P789CI,1,280.731,3.06358e+17,172.26.215.93,62621,524.427,2.80711,2.80711,2.80711,1688244344,1,9dc0cd94
Trainable_e4484724,2023-07-02_05-45-22,True,DESKTOP-0P789CI,1,346.201,5.43014e+17,172.26.215.93,62159,521.451,6.46928,6.46928,6.46928,1688244322,1,e4484724


[2m[36m(Trainable pid=62087)[0m tensor([[-0.4126, -0.5459, -0.5483,  0.4195, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.4088, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3743, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3549, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3625, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3452, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3119, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3151, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3420, -0.3318, -0.3239],
[2m[36m(Trainable pid=62087)[0m         [-0.4126, -0.5459, -0.5483,  0.3646, -0.3318, -0.3239],
[2m[36m(

[2m[36m(Trainable pid=62456)[0m         [ 0.0000,  0.0000, -0.5893,  2.1494,  0.0000, 74.0000]]) tensor([[ 0.0000, -0.0708, -0.1137,  1.3892,  0.0000,  0.4000],[32m [repeated 47x across cluster][0m
[2m[36m(Trainable pid=62456)[0m    5.87008417e+00  9.79211926e-01]][32m [repeated 37968x across cluster][0m
[2m[36m(Trainable pid=62456)[0m      6.8354553 ][32m [repeated 40x across cluster][0m
[2m[36m(Trainable pid=62456)[0m         ...,[32m [repeated 71x across cluster][0m
[2m[36m(Trainable pid=62456)[0m  ...[32m [repeated 36x across cluster][0m
[2m[36m(Trainable pid=62456)[0m  [ 2.37289958e+01 -4.38481653e+01  3.07451894e+01  6.37927104e+02[32m [repeated 29057x across cluster][0m
[2m[36m(Trainable pid=62456)[0m           2.4000e+00]]) [[ 5.46543915e+02  4.99646560e+02  2.44233036e+02 -2.76980626e+02[32m [repeated 78x across cluster][0m
[2m[36m(Trainable pid=62456)[0m tensor([[ 4.0385e-01, -7.0755e-02, -1.1373e-01,  4.5317e-01,  0.0000e+00,[32m [repea