# Task4

Index_X = FSR_for_force, FSR_for_coord

Index_y = force, x_coord, y_coord

Data = Splited by Subject

## Run result

https://wandb.ai/seokjin/FSR-prediction/groups/FSR_Trainable_2023-07-05_12-59-09/workspace?workspace=user-seokjin

## Experiment id

FSR_Trainable_2023-07-05_12-59-09

In [1]:
def define_searchspace(trial):
    model = trial.suggest_categorical('model', ['fsr_model.LSTM', 'fsr_model.CNN_LSTM', 'fsr_model.ANN'])
    if model == 'fsr_model.LSTM':
        trial.suggest_categorical('model_args/hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/num_layer', 1, 8)
    elif model == 'fsr_model.CNN_LSTM':
        trial.suggest_categorical('model_args/cnn_hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_categorical('model_args/lstm_hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/cnn_num_layer', 1, 8)
        trial.suggest_int('model_args/lstm_num_layer', 1, 8)
    elif model == 'fsr_model.ANN':
        trial.suggest_categorical('model_args/hidden_size', [8, 16, 32, 64, 128])
        trial.suggest_int('model_args/num_layer', 1, 8)
    trial.suggest_categorical('criterion', ['torch.nn.MSELoss'])
    trial.suggest_categorical('optimizer', [
        'torch.optim.Adam',
        'torch.optim.NAdam',
        'torch.optim.Adagrad',
        'torch.optim.RAdam',
        'torch.optim.SGD',
    ])
    trial.suggest_float('optimizer_args/lr', 1e-5, 1e-1, log=True)
    imputer = trial.suggest_categorical('imputer', ['sklearn.impute.SimpleImputer'])
    if imputer == 'sklearn.impute.SimpleImputer':
        trial.suggest_categorical('imputer_args/strategy', [
            'mean',
            'median',
        ])
    trial.suggest_categorical('scaler', [ 
        'sklearn.preprocessing.StandardScaler',
        'sklearn.preprocessing.MinMaxScaler',
        'sklearn.preprocessing.RobustScaler',
    ])
    return {
        'index_X': ['FSR_for_force', 'FSR_for_coord'],
        'index_y': ['force', 'x_coord', 'y_coord'],
        'data_loader': 'fsr_data.get_index_splited_by_subject'
    }

In [2]:
import ray.tune
import ray.air
import ray.air.integrations.wandb
import ray.tune.schedulers
from fsr_trainable import FSR_Trainable
import ray.tune.search
import ray.tune.search.optuna

tuner = ray.tune.Tuner(
    trainable=ray.tune.with_resources(
        FSR_Trainable, {'cpu':2},
    ),
    tune_config=ray.tune.TuneConfig(
        num_samples=1000,
        scheduler=ray.tune.schedulers.ASHAScheduler(
            max_t=100,
            grace_period=1,
            reduction_factor=2,
            brackets=1,
            metric='rmse',
            mode='min',
        ),
        search_alg=ray.tune.search.optuna.OptunaSearch(
            space=define_searchspace,
            metric='rmse',
            mode='min',
        ),
    ), 
    run_config=ray.air.RunConfig(
        callbacks=[
            ray.air.integrations.wandb.WandbLoggerCallback(project='FSR-prediction'),
        ],
        checkpoint_config=ray.air.CheckpointConfig(
            num_to_keep=3,
            checkpoint_score_attribute='rmse',
            checkpoint_score_order='min',
            checkpoint_frequency=5,
            checkpoint_at_end=True,
        ),
    ), 
)

[I 2023-07-05 21:30:17,633] A new study created in memory with name: optuna


In [3]:
results = tuner.fit()

2023-07-05 12:59:12,225	INFO worker.py:1627 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
2023-07-05 12:59:13,560	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.


0,1
Current time:,2023-07-05 14:48:07
Running for:,01:48:54.28
Memory:,4.6/7.7 GiB

Trial name,status,loc,criterion,data_loader,imputer,imputer_args/strateg y,index_X,index_y,model,model_args/cnn_hidde n_size,model_args/cnn_num_l ayer,model_args/hidden_si ze,model_args/lstm_hidd en_size,model_args/lstm_num_ layer,model_args/num_layer,optimizer,optimizer_args/lr,scaler,iter,total time (s),rmse,mae,mape
FSR_Trainable_09f76875,RUNNING,172.26.215.93:829840,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_5f00,"['force', 'x_co_4b40",fsr_model.ANN,,,32.0,,,2.0,torch.optim.Adagrad,0.0167658,sklearn.preproc_4210,1.0,0.654521,145.432,46.6233,7.88273e+16
FSR_Trainable_66a13aac,PENDING,,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_8040,"['force', 'x_co_b400",fsr_model.ANN,,,32.0,,,2.0,torch.optim.Adagrad,0.000451912,sklearn.preproc_4210,,,,,
FSR_Trainable_00aaa09a,TERMINATED,172.26.215.93:812577,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_1240,"['force', 'x_co_0fc0",fsr_model.ANN,,,64.0,,,2.0,torch.optim.Adagrad,0.0202296,sklearn.preproc_4210,4.0,2.16029,102.497,38.6561,6.20297e+16
FSR_Trainable_0156e8fd,TERMINATED,172.26.215.93:799415,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_54c0,"['force', 'x_co_5ec0",fsr_model.ANN,,,64.0,,,3.0,torch.optim.Adagrad,0.0238314,sklearn.preproc_4210,32.0,15.6676,102.575,37.9317,5.93478e+16
FSR_Trainable_022a36d7,TERMINATED,172.26.215.93:727824,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_5980,"['force', 'x_co_af00",fsr_model.ANN,,,32.0,,,3.0,torch.optim.Adagrad,0.0271635,sklearn.preproc_4210,32.0,9.85165,104.082,38.856,6.28856e+16
FSR_Trainable_0259bbf3,TERMINATED,172.26.215.93:815145,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_b5c0,"['force', 'x_co_8280",fsr_model.ANN,,,128.0,,,2.0,torch.optim.SGD,0.0779616,sklearn.preproc_4210,1.0,0.998672,228.422,76.7246,1.36387e+17
FSR_Trainable_025ecdc7,TERMINATED,172.26.215.93:808976,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_aa40,"['force', 'x_co_bd80",fsr_model.ANN,,,128.0,,,1.0,torch.optim.Adagrad,0.00076914,sklearn.preproc_4210,1.0,0.779067,461.115,188.547,3.14909e+17
FSR_Trainable_02f24937,TERMINATED,172.26.215.93:779465,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_0340,"['force', 'x_co_1800",fsr_model.CNN_LSTM,16.0,5.0,,64.0,5.0,,torch.optim.Adagrad,0.0228942,sklearn.preproc_4210,1.0,4.89111,226.071,67.1013,9.01184e+16
FSR_Trainable_02f72832,TERMINATED,172.26.215.93:757054,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_74c0,"['force', 'x_co_6840",fsr_model.ANN,,,64.0,,,2.0,torch.optim.Adagrad,0.025266,sklearn.preproc_4210,4.0,1.92954,103.177,38.9465,6.15771e+16
FSR_Trainable_031220ee,TERMINATED,172.26.215.93:825029,torch.nn.MSELoss,fsr_data.get_in_42d0,sklearn.impute._b190,median,['FSR_for_force_3680,"['force', 'x_co_0800",fsr_model.ANN,,,64.0,,,3.0,torch.optim.Adagrad,0.0109637,sklearn.preproc_4210,2.0,1.79492,110.316,43.3289,7.89152e+16


2023-07-05 12:59:13,621	INFO wandb.py:320 -- Already logged into W&B.


Trial name,date,done,hostname,iterations_since_restore,mae,mape,node_ip,pid,rmse,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
FSR_Trainable_00aaa09a,2023-07-05_14-32-07,True,DESKTOP-0P789CI,4,38.6561,6.20297e+16,172.26.215.93,812577,102.497,2.16029,0.413183,2.16029,1688535127,4,00aaa09a
FSR_Trainable_0156e8fd,2023-07-05_14-19-39,True,DESKTOP-0P789CI,32,37.9317,5.93478e+16,172.26.215.93,799415,102.575,15.6676,0.707756,15.6676,1688534379,32,0156e8fd
FSR_Trainable_022a36d7,2023-07-05_13-20-29,True,DESKTOP-0P789CI,32,38.856,6.28856e+16,172.26.215.93,727824,104.082,9.85165,0.298778,9.85165,1688530829,32,022a36d7
FSR_Trainable_0259bbf3,2023-07-05_14-34-18,True,DESKTOP-0P789CI,1,76.7246,1.36387e+17,172.26.215.93,815145,228.422,0.998672,0.998672,0.998672,1688535258,1,0259bbf3
FSR_Trainable_025ecdc7,2023-07-05_14-28-48,True,DESKTOP-0P789CI,1,188.547,3.14909e+17,172.26.215.93,808976,461.115,0.779067,0.779067,0.779067,1688534928,1,025ecdc7
FSR_Trainable_02f24937,2023-07-05_14-02-20,True,DESKTOP-0P789CI,1,67.1013,9.01184e+16,172.26.215.93,779465,226.071,4.89111,4.89111,4.89111,1688533340,1,02f24937
FSR_Trainable_02f72832,2023-07-05_13-43-55,True,DESKTOP-0P789CI,4,38.9465,6.15771e+16,172.26.215.93,757054,103.177,1.92954,0.293198,1.92954,1688532235,4,02f72832
FSR_Trainable_031220ee,2023-07-05_14-43-56,True,DESKTOP-0P789CI,2,43.3289,7.89152e+16,172.26.215.93,825029,110.316,1.79492,0.532513,1.79492,1688535836,2,031220ee
FSR_Trainable_031d831f,2023-07-05_13-01-14,True,DESKTOP-0P789CI,16,78.7535,1.58112e+17,172.26.215.93,705179,223.024,82.2599,5.55783,82.2599,1688529674,16,031d831f
FSR_Trainable_033235bb,2023-07-05_13-43-40,True,DESKTOP-0P789CI,8,38.732,6.32437e+16,172.26.215.93,756678,102.133,3.00694,0.271835,3.00694,1688532220,8,033235bb


[2m[36m(_WandbLoggingActor pid=704835)[0m wandb: Currently logged in as: seokjin. Use `wandb login --relogin` to force relogin
[2m[36m(_WandbLoggingActor pid=704835)[0m wandb: Tracking run with wandb version 0.15.4
[2m[36m(_WandbLoggingActor pid=704835)[0m wandb: Run data is saved locally in /home/seokj/ray_results/FSR_Trainable_2023-07-05_12-59-09/FSR_Trainable_80da3f67_1_criterion=torch_nn_MSELoss,data_loader=fsr_data_get_index_splited_by_subject,imputer=sklearn_impute_Simpl_2023-07-05_12-59-13/wandb/run-20230705_125924-80da3f67
[2m[36m(_WandbLoggingActor pid=704835)[0m wandb: Run `wandb offline` to turn off syncing.
[2m[36m(_WandbLoggingActor pid=704835)[0m wandb: Syncing run FSR_Trainable_80da3f67
[2m[36m(_WandbLoggingActor pid=704835)[0m wandb: ⭐️ View project at https://wandb.ai/seokjin/FSR-prediction
[2m[36m(_WandbLoggingActor pid=704835)[0m wandb: 🚀 View run at https://wandb.ai/seokjin/FSR-prediction/runs/80da3f67
[2m[36m(_WandbLoggingActor pid=705009)[0



In [4]:
results = tuner.restore(
    path='/home/seokj/ray_results/FSR_Trainable_2023-07-05_12-59-09',
    trainable=FSR_Trainable,
).fit()

2023-07-05 21:32:18,504	INFO experiment_analysis.py:966 -- No trial data passed in during `ExperimentAnalysis` initialization -- you are most likely loading the experiment after it has completed.
Loading trial data from the experiment checkpoint file. This may result in loading some stale information, since checkpointing is periodic.


0,1
Current time:,2023-07-05 21:32:26
Running for:,00:00:01.99
Memory:,3.0/7.7 GiB

Trial name,status,loc,criterion,data_loader,imputer,imputer_args/strateg y,index_X,index_y,model,model_args/cnn_hidde n_size,model_args/cnn_num_l ayer,model_args/hidden_si ze,model_args/lstm_hidd en_size,model_args/lstm_num_ layer,model_args/num_layer,optimizer,optimizer_args/lr,scaler,iter,total time (s),rmse,mae,mape
FSR_Trainable_d2093de7,TERMINATED,172.26.215.93:71694,torch.nn.MSELoss,fsr_data.get_in_9c50,sklearn.impute._f910,median,['FSR_for_force_7840,"['force', 'x_co_7980",fsr_model.ANN,,,64.0,,,2.0,torch.optim.Adagrad,0.0127099,sklearn.preproc_9950,4,2.00436,107.173,36.4113,18526400.0
FSR_Trainable_4237811b,TERMINATED,172.26.215.93:71509,torch.nn.MSELoss,fsr_data.get_in_d170,sklearn.impute._3dc0,median,['FSR_for_force_2540,"['force', 'x_co_38c0",fsr_model.ANN,,,64.0,,,2.0,torch.optim.Adagrad,0.00867934,sklearn.preproc_cf30,2,1.16177,113.409,39.7939,19273600.0
FSR_Trainable_8e58a444,TERMINATED,172.26.215.93:71322,torch.nn.MSELoss,fsr_data.get_in_b570,sklearn.impute._5390,median,['FSR_for_force_bec0,"['force', 'x_co_1840",fsr_model.ANN,,,128.0,,,2.0,torch.optim.Adagrad,0.00179934,sklearn.preproc_bb70,1,0.911392,191.932,65.5954,1.19955e+17
FSR_Trainable_6a62e483,TERMINATED,172.26.215.93:71134,torch.nn.MSELoss,fsr_data.get_in_d590,sklearn.impute._5de0,median,['FSR_for_force_6c40,"['force', 'x_co_6dc0",fsr_model.ANN,,,128.0,,,3.0,torch.optim.Adagrad,0.0502265,sklearn.preproc_d350,1,1.18368,176.858,58.5354,1.0012e+17
FSR_Trainable_8d4e594c,TERMINATED,172.26.215.93:70948,torch.nn.MSELoss,fsr_data.get_in_20d0,sklearn.impute._26a0,median,['FSR_for_force_fa80,"['force', 'x_co_4340",fsr_model.ANN,,,64.0,,,3.0,torch.optim.Adagrad,0.00504617,sklearn.preproc_2010,2,1.4835,150.572,48.886,7.49623e+16
FSR_Trainable_a4603716,TERMINATED,172.26.215.93:70760,torch.nn.MSELoss,fsr_data.get_in_ca50,sklearn.impute._9480,median,['FSR_for_force_be00,"['force', 'x_co_5340",fsr_model.ANN,,,64.0,,,2.0,torch.optim.Adagrad,0.00051177,sklearn.preproc_a850,1,0.664616,301.578,92.1541,1.06861e+17
FSR_Trainable_089d9319,TERMINATED,172.26.215.93:70575,torch.nn.MSELoss,fsr_data.get_in_2d90,sklearn.impute._2420,median,['FSR_for_force_9b00,"['force', 'x_co_3c00",fsr_model.ANN,,,8.0,,,2.0,torch.optim.Adagrad,0.0328189,sklearn.preproc_3570,2,1.30857,142.745,48.0149,8.43698e+16
FSR_Trainable_394ed8f8,TERMINATED,172.26.215.93:70388,torch.nn.MSELoss,fsr_data.get_in_cff0,sklearn.impute._5390,median,['FSR_for_force_bc40,"['force', 'x_co_4b40",fsr_model.ANN,,,32.0,,,2.0,torch.optim.Adagrad,0.000367069,sklearn.preproc_cdb0,1,0.712965,408.849,132.445,1.6292e+17
FSR_Trainable_43b60b9e,TERMINATED,172.26.215.93:70200,torch.nn.MSELoss,fsr_data.get_in_0810,sklearn.impute._55c0,median,['FSR_for_force_64c0,"['force', 'x_co_c080",fsr_model.ANN,,,32.0,,,2.0,torch.optim.Adagrad,0.0222095,sklearn.preproc_0750,2,1.32882,115.516,42.5187,7.10236e+16
FSR_Trainable_598749f8,TERMINATED,172.26.215.93:70014,torch.nn.MSELoss,fsr_data.get_in_c450,sklearn.impute._3550,median,['FSR_for_force_7c00,"['force', 'x_co_a400",fsr_model.ANN,,,32.0,,,2.0,torch.optim.Adam,0.00060358,sklearn.preproc_c030,1,0.811654,220.745,74.7662,1.3882e+17


2023-07-05 21:32:21,563	INFO experiment_state.py:435 -- A local experiment checkpoint was found and will be used to restore the previous experiment state.
2023-07-05 21:32:21,602	INFO trial_runner.py:422 -- Using the newest experiment state file found within the experiment directory: experiment_state-2023-07-05_21-30-25.json
2023-07-05 21:32:24,958	INFO wandb.py:320 -- Already logged into W&B.
2023-07-05 21:32:27,368	INFO tune.py:1111 -- Total run time: 5.85 seconds (0.00 seconds for the tuning loop).


[2m[33m(raylet)[0m [2023-07-05 21:56:23,434 E 75614 75614] (raylet) node_manager.cc:3069: 1 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 88697f47488e3eef3ef0885f2246dd05ce5ee6ed5dd6e9b1280dd318, IP: 172.26.215.93) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 172.26.215.93`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.
