# Task1

Index_X = FSR_for_force, FSR_for_coord

Index_y = force, x_coord, y_coord

Data = Splited by Time

## Run result

https://wandb.ai/seokjin/FSR-prediction/groups/FSR_Trainable_2023-07-04_21-31-27/workspace?workspace=user-seokjin

## Experiment id

FSR_Trainable_2023-07-04_21-31-27

In [1]:
def define_searchspace(trial):
    model = trial.suggest_categorical('model', ['fsr_model.LSTM', 'fsr_model.CNN_LSTM', 'fsr_model.ANN'])
    if model == 'fsr_model.LSTM':
        trial.suggest_categorical('model_args/hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/num_layer', 1, 8)
    elif model == 'fsr_model.CNN_LSTM':
        trial.suggest_categorical('model_args/cnn_hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_categorical('model_args/lstm_hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/cnn_num_layer', 1, 8)
        trial.suggest_int('model_args/lstm_num_layer', 1, 8)
    elif model == 'fsr_model.ANN':
        trial.suggest_categorical('model_args/hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/num_layer', 1, 8)
    trial.suggest_categorical('criterion', ['torch.nn.MSELoss'])
    trial.suggest_categorical('optimizer', [
        'torch.optim.Adam',
        'torch.optim.NAdam',
        'torch.optim.Adagrad',
        'torch.optim.RAdam',
        'torch.optim.SGD',
    ])
    trial.suggest_float('optimizer_args/lr', 1e-5, 1e-1, log=True)
    imputer = trial.suggest_categorical('imputer', ['sklearn.impute.SimpleImputer'])
    if imputer == 'sklearn.impute.SimpleImputer':
        trial.suggest_categorical('imputer_args/strategy', [
            'mean',
            'median',
        ])
    trial.suggest_categorical('scaler', [ 
        'sklearn.preprocessing.StandardScaler',
        'sklearn.preprocessing.MinMaxScaler',
        'sklearn.preprocessing.RobustScaler',
    ])
    return {
        'index_X': ['FSR_for_force', 'FSR_for_coord'],
        'index_y': ['force', 'x_coord', 'y_coord'],
        'data_loader': 'fsr_data.get_index_splited_by_time'
    }

In [2]:
import ray.tune
import ray.air
import ray.air.integrations.wandb
import ray.tune.schedulers
from fsr_trainable import FSR_Trainable
import ray.tune.search
import ray.tune.search.optuna

tuner = ray.tune.Tuner(
    trainable=ray.tune.with_resources(
        FSR_Trainable, {'cpu':2},
    ),
    tune_config=ray.tune.TuneConfig(
        num_samples=1000,
        scheduler=ray.tune.schedulers.ASHAScheduler(
            max_t=100,
            grace_period=1,
            reduction_factor=2,
            brackets=1,
            metric='rmse',
            mode='min',
        ),
        search_alg=ray.tune.search.optuna.OptunaSearch(
            space=define_searchspace,
            metric='rmse',
            mode='min',
        ),
    ), 
    run_config=ray.air.RunConfig(
        callbacks=[
            ray.air.integrations.wandb.WandbLoggerCallback(project='FSR-prediction'),
        ],
        checkpoint_config=ray.air.CheckpointConfig(
            num_to_keep=3,
            checkpoint_score_attribute='rmse',
            checkpoint_score_order='min',
            checkpoint_frequency=5,
            checkpoint_at_end=True,
        ),
    ), 
)

[I 2023-07-05 02:02:11,272] A new study created in memory with name: optuna


In [None]:
results = tuner.fit()

In [13]:
results = tuner.restore(
    path='/home/seokj/ray_results/FSR_Trainable_2023-07-04_21-31-27',
    trainable=FSR_Trainable
).fit()

Failed to read the results for 3 trials:
- /home/seokj/ray_results/FSR_Trainable_2023-07-04_21-31-27/FSR_Trainable_1e8ec326_856_criterion=torch_nn_MSELoss,data_loader=fsr_data_get_index_splited_by_time,imputer=sklearn_impute_Simple_2023-07-05_01-45-27
- /home/seokj/ray_results/FSR_Trainable_2023-07-04_21-31-27/FSR_Trainable_295022da_260_criterion=torch_nn_MSELoss,data_loader=fsr_data_get_index_splited_by_time,imputer=sklearn_impute_Simple_2023-07-04_23-17-48
- /home/seokj/ray_results/FSR_Trainable_2023-07-04_21-31-27/FSR_Trainable_a59376cb_658_criterion=torch_nn_MSELoss,data_loader=fsr_data_get_index_splited_by_time,imputer=sklearn_impute_Simple_2023-07-05_00-56-49
2023-07-05 02:13:18,878	INFO worker.py:1627 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
2023-07-05 02:13:20,029	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.


0,1
Current time:,2023-07-05 02:35:18
Running for:,00:21:56.03
Memory:,3.5/7.7 GiB

Trial name,# failures,error file
FSR_Trainable_295022da,1,"/home/seokj/ray_results/FSR_Trainable_2023-07-04_21-31-27/FSR_Trainable_295022da_260_criterion=torch_nn_MSELoss,data_loader=fsr_data_get_index_splited_by_time,imputer=sklearn_impute_Simple_2023-07-04_23-17-48/error.txt"
FSR_Trainable_a59376cb,1,"/home/seokj/ray_results/FSR_Trainable_2023-07-04_21-31-27/FSR_Trainable_a59376cb_658_criterion=torch_nn_MSELoss,data_loader=fsr_data_get_index_splited_by_time,imputer=sklearn_impute_Simple_2023-07-05_00-56-49/error.txt"

Trial name,status,loc,criterion,data_loader,imputer,imputer_args/strateg y,index_X,index_y,model,model_args/cnn_hidde n_size,model_args/cnn_num_l ayer,model_args/hidden_si ze,model_args/lstm_hidd en_size,model_args/lstm_num_ layer,model_args/num_layer,optimizer,optimizer_args/lr,scaler,iter,total time (s),rmse,mae,mape
FSR_Trainable_394f825f,TERMINATED,172.26.215.93:202697,torch.nn.MSELoss,fsr_data.get_in_8ff0,sklearn.impute._3280,median,['FSR_for_force_72c0,"['force', 'x_co_9b40",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.Adam,0.000736628,sklearn.preproc_89f0,83.0,288.052,98.0594,29.2633,53527500.0
FSR_Trainable_40f2002e,TERMINATED,172.26.215.93:202698,torch.nn.MSELoss,fsr_data.get_in_27f0,sklearn.impute._32d0,median,['FSR_for_force_8d40,"['force', 'x_co_2cc0",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.Adam,0.000770778,sklearn.preproc_2670,100.0,298.797,91.5677,26.4649,40672400.0
FSR_Trainable_41cc7966,TERMINATED,172.26.215.93:202700,torch.nn.MSELoss,fsr_data.get_in_cf90,sklearn.impute._ffa0,median,['FSR_for_force_7700,"['force', 'x_co_d740",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.Adam,0.000833311,sklearn.preproc_cc90,37.0,145.857,99.245,29.7046,46833400.0
FSR_Trainable_07f9f19c,TERMINATED,172.26.215.93:202702,torch.nn.MSELoss,fsr_data.get_in_0810,sklearn.impute._b780,median,['FSR_for_force_7fc0,"['force', 'x_co_5500",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.Adam,0.000997587,sklearn.preproc_0750,4.0,14.597,112.123,34.9086,61622900.0
FSR_Trainable_18aa418a,TERMINATED,172.26.215.93:198266,torch.nn.MSELoss,fsr_data.get_in_fdb0,sklearn.impute._f0f0,median,['FSR_for_force_f9c0,"['force', 'x_co_6140",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.RAdam,0.00220731,sklearn.preproc_fb70,1.0,3.81135,208.95,66.367,176732000.0
FSR_Trainable_3dae7291,TERMINATED,172.26.215.93:197980,torch.nn.MSELoss,fsr_data.get_in_f210,sklearn.impute._b690,mean,['FSR_for_force_ef80,"['force', 'x_co_f080",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.RAdam,0.000322941,sklearn.preproc_efd0,1.0,6.66716,247.061,85.2694,265233000.0
FSR_Trainable_932e5247,TERMINATED,172.26.215.93:197739,torch.nn.MSELoss,fsr_data.get_in_e610,sklearn.impute._6920,mean,['FSR_for_force_44c0,"['force', 'x_co_c100",fsr_model.LSTM,,,32.0,,,8.0,torch.optim.Adam,0.00128305,sklearn.preproc_2d90,1.0,6.54485,261.178,88.9624,299577000.0
FSR_Trainable_f82ab704,TERMINATED,172.26.215.93:197512,torch.nn.MSELoss,fsr_data.get_in_d110,sklearn.impute._c030,mean,['FSR_for_force_7240,"['force', 'x_co_5500",fsr_model.LSTM,,,32.0,,,1.0,torch.optim.Adam,0.000722359,sklearn.preproc_d8f0,1.0,2.09831,213.718,69.3854,222807000.0
FSR_Trainable_a9d61856,TERMINATED,172.26.215.93:196741,torch.nn.MSELoss,fsr_data.get_in_2af0,sklearn.impute._f140,median,['FSR_for_force_8d00,"['force', 'x_co_a8c0",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.Adam,0.000864028,sklearn.preproc_32d0,16.0,55.0769,105.755,31.88,53247200.0
FSR_Trainable_fd0ba6fb,TERMINATED,172.26.215.93:196965,torch.nn.MSELoss,fsr_data.get_in_c0f0,sklearn.impute._efb0,median,['FSR_for_force_4a40,"['force', 'x_co_4340",fsr_model.LSTM,,,128.0,,,1.0,torch.optim.Adam,0.000821097,sklearn.preproc_c030,4.0,14.2732,112.055,34.6467,48616100.0


2023-07-05 02:13:20,103	INFO experiment_state.py:435 -- A local experiment checkpoint was found and will be used to restore the previous experiment state.
2023-07-05 02:13:20,140	INFO trial_runner.py:422 -- Using the newest experiment state file found within the experiment directory: experiment_state-2023-07-04_21-31-32.json
2023-07-05 02:13:22,756	INFO wandb.py:320 -- Already logged into W&B.
[2m[36m(FSR_Trainable pid=202697)[0m 2023-07-05 02:13:31,773	INFO trainable.py:918 -- Restored on 172.26.215.93 from checkpoint: /tmp/checkpoint_tmp_70b95321f16c41668125b2dcc8c645fd
[2m[36m(FSR_Trainable pid=202697)[0m 2023-07-05 02:13:31,773	INFO trainable.py:927 -- Current state after restoring: {'_iteration': 80, '_timesteps_total': None, '_time_total': 276.3226668834686, '_episodes_total': None}


Trial name,date,done,hostname,iterations_since_restore,mae,mape,node_ip,pid,rmse,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
FSR_Trainable_0341bc42,2023-07-05_02-25-13,True,DESKTOP-0P789CI,1,86.5541,302447000.0,172.26.215.93,218620,255.161,4.93402,4.93402,4.93402,1688491513,1,0341bc42
FSR_Trainable_039599b4,2023-07-05_02-19-49,True,DESKTOP-0P789CI,8,33.009,52318200.0,172.26.215.93,210890,107.868,17.8723,2.21719,17.8723,1688491189,8,039599b4
FSR_Trainable_059f96c3,2023-07-05_02-20-33,True,DESKTOP-0P789CI,1,65.2449,114558000.0,172.26.215.93,212304,208.429,2.63713,2.63713,2.63713,1688491233,1,059f96c3
FSR_Trainable_07f9f19c,2023-07-05_02-13-57,True,DESKTOP-0P789CI,4,34.9086,61622900.0,172.26.215.93,202702,112.123,14.597,2.46022,14.597,1688490837,4,07f9f19c
FSR_Trainable_086b9c96,2023-07-05_02-29-38,True,DESKTOP-0P789CI,1,62.7991,146386000.0,172.26.215.93,224987,194.7,1.41878,1.41878,1.41878,1688491778,1,086b9c96
FSR_Trainable_0dea044e,2023-07-05_02-25-39,True,DESKTOP-0P789CI,4,34.1872,53948200.0,172.26.215.93,219094,110.8,8.41184,2.39622,8.41184,1688491539,4,0dea044e
FSR_Trainable_0eae9556,2023-07-05_02-27-29,True,DESKTOP-0P789CI,1,85.9466,2.40901e+16,172.26.215.93,221715,271.478,4.75926,4.75926,4.75926,1688491649,1,0eae9556
FSR_Trainable_0f6bcd17,2023-07-05_02-26-56,True,DESKTOP-0P789CI,1,87.5111,272192000.0,172.26.215.93,220927,259.712,4.03419,4.03419,4.03419,1688491616,1,0f6bcd17
FSR_Trainable_0ff9f9ef,2023-07-05_02-33-14,True,DESKTOP-0P789CI,8,34.1913,55050900.0,172.26.215.93,229459,109.497,18.6416,3.00204,18.6416,1688491994,8,0ff9f9ef
FSR_Trainable_10492b40,2023-07-05_02-15-31,True,DESKTOP-0P789CI,2,43.5909,63433800.0,172.26.215.93,205468,140.774,8.48554,3.84757,8.48554,1688490931,2,10492b40


[2m[36m(_WandbLoggingActor pid=202922)[0m wandb: Currently logged in as: seokjin. Use `wandb login --relogin` to force relogin
[2m[36m(FSR_Trainable pid=202700)[0m 2023-07-05 02:13:32,581	INFO trainable.py:918 -- Restored on 172.26.215.93 from checkpoint: /tmp/checkpoint_tmp_e8df20ff88b44269bc86f04d8dbefcd5[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(FSR_Trainable pid=202700)[0m 2023-07-05 02:13:32,581	INFO trainable.py:927 -- Current state after restoring: {'_iteration': 35, '_timesteps_total': None, '_time_total': 138.14852929115295, '_episodes_total': None}[32m [repeated 2x across cluster][0m
[2m[36m(_WandbLoggingActor pid=202922)[0m wandb: Tracking run with wandb version 0.15.4
[2m[36m(_WandbLoggingActor pid=202922)[0m wandb: Run data is saved locally in /home/seokj/ray_r