In [2]:
from glob import glob
import pandas as pd
paths = glob('./data/*/*/*')
filedata = pd.DataFrame([path.split('/')[2:] for path in paths], columns=['subject', 'pose', 'filename'])
filedata['path'] = paths
filedata = filedata.sort_values(['subject', 'pose']).reset_index(drop=True)
filedata
data = []
for index, value in filedata.iterrows():
    df = pd.read_pickle(value['path'])
    df = df.rename_axis('time').reset_index()
    df['id'] = index
    cols = df.columns.to_list()
    df = df[cols[-1:] + cols[:-1]]
    data.append(df)
data = pd.concat(data)
data = data.reset_index(drop=True)
data

Category,id,time,force,force,force,force,force,force,x_coord,x_coord,...,FSR_for_force,FSR_for_force,FSR_for_force,FSR_for_force,FSR_for_coord,FSR_for_coord,FSR_for_coord,FSR_for_coord,FSR_for_coord,FSR_for_coord
Position,Unnamed: 1_level_1,Unnamed: 2_level_1,A,B,C,D,E,F,A,B,...,C,D,E,F,A,B,C,D,E,F
0,0,0,0.0,0.0,0.0,22.0,0.0,0.0,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0
1,0,1,0.0,0.0,0.0,277.0,0.0,0.0,,,...,0.0,60.0,0.0,0.0,0.0,0.0,0.000000,0.952381,0.0,0.0
2,0,2,0.0,0.0,0.0,488.0,0.0,0.0,,,...,0.0,73.0,0.0,0.0,0.0,0.0,0.000000,1.158730,0.0,0.0
3,0,3,0.0,0.0,0.0,501.0,0.0,0.0,,,...,0.0,84.0,0.0,0.0,0.0,0.0,0.000000,1.333333,0.0,0.0
4,0,4,0.0,0.0,0.0,540.0,0.0,0.0,,,...,0.0,100.0,0.0,0.0,0.0,0.0,0.000000,1.587302,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52144,72,635,0.0,0.0,0.0,983.0,0.0,0.0,,,...,37.0,180.0,0.0,0.0,0.0,0.0,0.587302,2.857143,0.0,0.0
52145,72,636,0.0,0.0,0.0,962.0,0.0,0.0,,,...,38.0,172.0,0.0,0.0,0.0,0.0,0.603175,2.730159,0.0,0.0
52146,72,637,0.0,0.0,0.0,910.0,0.0,0.0,,,...,38.0,165.0,0.0,0.0,0.0,0.0,0.603175,2.619048,0.0,0.0
52147,72,638,0.0,0.0,0.0,851.0,0.0,0.0,,,...,39.0,160.0,0.0,0.0,0.0,0.0,0.619048,2.539683,0.0,0.0


In [3]:
import torch

class FSRDataset(torch.utils.data.Dataset):
    def __init__(self, X_df, y_df, index):
        assert(len(X_df) == len(y_df))
        self.X_df = X_df
        self.y_df = y_df
        self.index = index

    def __len__(self):
        return len(self.index)
    
    def __getitem__(self, idx):
        import numpy as np
        X = self.X_df.loc[self.index[idx]].to_numpy().astype(np.float32)
        y = self.y_df.loc[self.index[idx]].to_numpy().astype(np.float32)
        return X, y

In [4]:
def get_index_splited_by_time(test_size=None):
    from sklearn.model_selection import train_test_split
    train_indexes = []
    test_indexes = []
    for _, group in data.groupby('id'):
        train_index, test_index = train_test_split(group.index, test_size=0.2, shuffle=False)
        train_indexes.append(train_index)
        test_indexes.append(test_index)
    return train_indexes, test_indexes

In [5]:
class LSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layer, output_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layer = num_layer
        self.output_size = output_size
        self.encoder = torch.nn.LSTM(input_size, hidden_size, num_layer)
        self.decoder = torch.nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x, _ = self.encoder(x)
        x = self.decoder(x)
        return x

In [6]:
import torch
from ray.air import session
from ray.air.config import ScalingConfig
from sklearn.metrics import mean_absolute_error, mean_squared_error
import ray.train.torch
import numpy as np
import random
import model as net
import torch.utils.data

def train_loop_per_worker(config):
    ray.train.torch.enable_reproducibility()
    model_name = config['model_name']
    model_args = config['model_args']
    num_epochs = config['num_epochs']
    criterion_name = config['criterion_name']
    optimizer_name = config['optimizer_name']
    lr = config['lr']

    model = getattr(net, model_name)(**model_args)
    model = ray.train.torch.prepare_model(model)
    criterion = getattr(torch.nn, criterion_name)()
    optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)

    train_index, test_index = get_index_splited_by_time()
    train_dataset = FSRDataset(data['FSR_for_force'], data['force'], train_index)
    test_dataset = FSRDataset(data['FSR_for_force'], data['force'], test_index)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=None)
    train_loader = ray.train.torch.prepare_data_loader(train_loader)
    test_loader = ray.train.torch.prepare_data_loader(test_loader)

    best_rmse = float('inf')
    
    for epoch in range(num_epochs):
        model.train()
        criterion.train()
        for X, y in train_loader:
            pred = model(X)
            loss = criterion(pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        criterion.eval()
        with torch.no_grad():
            mae = []
            mse = []
            num = []
            for X, y in test_loader:
                pred = model(X)
                mae.append(mean_absolute_error(y, pred.cpu().detach()))
                mse.append(mean_squared_error(y, pred.cpu().detach()))
                num.append(len(y))
            mae = np.array(mae)
            mse = np.array(mse)
            num = np.array(num)
            mae = (mae * num).sum() / sum(num)
            mse = (mse * num).sum() / sum(num)
            
            rmse = mse ** 0.5
            if rmse < best_rmse:
                best_rmse = rmse

            session.report({'MAE': mae, 'RMSE': rmse})
                
    return best_rmse

trainer = ray.train.torch.TorchTrainer(
    train_loop_per_worker=train_loop_per_worker,
    train_loop_config={
        'batch_size':128,
        'lr':0.0001,
        'model_name':'LSTM',
        'model_args':{
            'input_size':6, 
            'hidden_size':128, 
            'num_layer':4, 
            'output_size':6,
        },
        'num_epochs': 32,
        'criterion_name': 'MSELoss',
        'optimizer_name': 'Adam',
    },
    scaling_config=ScalingConfig(
        num_workers=3,
        use_gpu=False,
        trainer_resources={'CPU':0}
    ),
)

In [7]:
trainer.fit()

2023-06-28 10:26:22,595	INFO worker.py:1627 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
2023-06-28 10:26:23,797	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Trainer(...)`.


0,1
Current time:,2023-06-28 10:28:10
Running for:,00:01:46.36
Memory:,4.4/7.7 GiB

Trial name,status,loc,iter,total time (s),MAE,RMSE
TorchTrainer_cb57c_00000,TERMINATED,172.26.215.93:84502,32,100.355,227.397,525.853


[2m[36m(TorchTrainer pid=84502)[0m 2023-06-28 10:26:29,797	INFO backend_executor.py:137 -- Starting distributed worker processes: ['84566 (172.26.215.93)', '84567 (172.26.215.93)', '84568 (172.26.215.93)']
[2m[36m(RayTrainWorker pid=84566)[0m 2023-06-28 10:26:31,419	INFO config.py:86 -- Setting up process group for: env:// [rank=0, world_size=3]
[2m[36m(RayTrainWorker pid=84566)[0m 2023-06-28 10:26:33,295	INFO train_loop_utils.py:286 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=84566)[0m 2023-06-28 10:26:33,296	INFO train_loop_utils.py:346 -- Wrapping provided model in DistributedDataParallel.


Trial name,MAE,RMSE,date,done,experiment_tag,hostname,iterations_since_restore,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
TorchTrainer_cb57c_00000,227.397,525.853,2023-06-28_10-28-08,True,0,DESKTOP-0P789CI,32,172.26.215.93,84502,100.355,2.81067,100.355,1687915688,32,cb57c_00000


2023-06-28 10:28:10,209	INFO tune.py:1111 -- Total run time: 106.41 seconds (106.36 seconds for the tuning loop).


Result(
  metrics={'MAE': 227.39678746011765, 'RMSE': 525.8533704863545, 'done': True, 'trial_id': 'cb57c_00000', 'experiment_tag': '0'},
  path='/home/seokj/ray_results/TorchTrainer_2023-06-28_10-26-20/TorchTrainer_cb57c_00000_0_2023-06-28_10-26-23',
  checkpoint=None
)