In [1]:
import ray.tune
import ray.air
import ray.air.integrations.wandb
import ray.tune.schedulers
import torch
import numpy as np
import pandas as pd
import sklearn.preprocessing
import sklearn.metrics
import os

In [2]:
class LSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layer, output_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layer = num_layer
        self.output_size = output_size
        self.encoder = torch.nn.LSTM(input_size, hidden_size, num_layer)
        self.decoder = torch.nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x, _ = self.encoder(x)
        x = self.decoder(x)
        return x

In [3]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, X_df, y_df, index):
        assert(len(X_df) == len(y_df))
        self.X_df = X_df
        self.y_df = y_df
        self.index = index

    def __len__(self):
        return len(self.index)
    
    def __getitem__(self, idx):
        import numpy as np
        X = self.X_df.loc[self.index[idx]].to_numpy().astype(np.float32)
        y = self.y_df.loc[self.index[idx]].to_numpy().astype(np.float32)
        return X, y

In [4]:
class Trainable(ray.tune.Trainable):
    def setup(self, config):
        model = config['model']
        model_args = config['model_args']
        criterion = config['criterion']
        optimizer = config['optimizer']
        lr = config['lr']

        self.model = model(**model_args)
        self.criterion = criterion()
        self.optimizer = optimizer(self.model.parameters(), lr=lr)

        data = pd.DataFrame(np.random.normal(size=(10000, 4)), columns=['A', 'B', 'C', 'D'])
        train_index = [pd.Index(np.arange(500 * (i), 500 * (i + 1))) for i in range(10)]
        test_index = [pd.Index(np.arange(500 * (10 + i), 500 * (10 + i + 1))) for i in range(10)]
        scaler = sklearn.preprocessing.StandardScaler()
        data[:] = scaler.fit_transform(data)
        train_dataset = Dataset(data[['A', 'B']], data[['C', 'D']], train_index)
        test_dataset = Dataset(data[['A', 'B']], data[['C', 'D']], test_index)
        
        self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
        self.test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=None)
    
    def step(self):
        self.model.train()
        for X, y in self.train_loader:
            pred = self.model(X)
            loss = self.criterion(pred, y)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        self.model.eval()
        with torch.no_grad():
            mae, mse, mape, num = [], [], [], []
            for X, y in self.test_loader:
                pred = self.model(X)
                mae.append(sklearn.metrics.mean_absolute_error(y, pred.cpu().detach()))
                mse.append(sklearn.metrics.mean_squared_error(y, pred.cpu().detach()))
                mape.append(sklearn.metrics.mean_absolute_percentage_error(y, pred.cpu().detach()))
                num.append(len(y))
            mae = np.average(mae, weights=num)
            mse = np.average(mse, weights=num)
            mape = np.average(mape, weights=num)
            rmse = mse ** 0.5
        return {'rmse': rmse, 'mae':mae, 'mape':mape}

    def save_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "model.pth")
        torch.save(self.model.state_dict(), checkpoint_path)
        return tmp_checkpoint_dir
    
    def load_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "model.pth")
        self.model.load_state_dict(torch.load(checkpoint_path))

In [5]:
tuner = ray.tune.Tuner(
    trainable=Trainable,
    tune_config=ray.tune.TuneConfig(
        metric='rmse',
        mode='min',
        num_samples=-1, 
        scheduler=ray.tune.schedulers.ASHAScheduler(
            max_t=1,
            grace_period=1,
            reduction_factor=2,
            brackets=1,
        ),
    ),
    param_space={
        'lr':ray.tune.loguniform(1e-5, 1e-2),
        'model':LSTM,
        'model_args':{
            'input_size':2, 
            'hidden_size':ray.tune.choice([8, 16, 32, 64, 128, 256, 512]), 
            'num_layer':ray.tune.randint(1, 8), 
            'output_size':2,
        },
        'criterion': torch.nn.MSELoss, 
        'optimizer': torch.optim.Adam,
    },
    run_config=ray.air.RunConfig(
        checkpoint_config=ray.air.CheckpointConfig(
            num_to_keep=3,
            checkpoint_score_attribute='rmse',
            checkpoint_score_order='min',
            checkpoint_frequency=5,
            checkpoint_at_end=True,
        ), 
    ),
)
results = tuner.fit()

2023-07-01 11:07:42,472	INFO worker.py:1627 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m
2023-07-01 11:07:43,798	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.


0,1
Current time:,2023-07-01 11:56:37
Running for:,00:48:53.21
Memory:,5.7/7.7 GiB

Trial name,status,loc,lr,model_args/hidden_si ze,model_args/num_layer,iter,total time (s),rmse,mae,mape
Trainable_10c93_00730,RUNNING,172.26.215.93:260314,0.000546645,512,5,,,,,
Trainable_10c93_00732,RUNNING,172.26.215.93:260431,1.91411e-05,512,4,,,,,
Trainable_10c93_00735,RUNNING,172.26.215.93:260725,1.09245e-05,512,2,,,,,
Trainable_10c93_00738,PENDING,,0.000670349,8,6,,,,,
Trainable_10c93_00739,PENDING,,0.00128202,128,7,,,,,
Trainable_10c93_00740,PENDING,,1.42186e-05,16,3,,,,,
Trainable_10c93_00741,PENDING,,3.62498e-05,32,6,,,,,
Trainable_10c93_00742,PENDING,,1.10074e-05,64,5,,,,,
Trainable_10c93_00743,PENDING,,7.55481e-05,128,4,,,,,
Trainable_10c93_00744,PENDING,,0.000462501,32,6,,,,,


Trial name,date,done,hostname,iterations_since_restore,mae,mape,node_ip,pid,rmse,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
Trainable_10c93_00000,2023-07-01_11-07-57,True,DESKTOP-0P789CI,1,0.796665,1.09199,172.26.215.93,205480,0.998567,1.35742,1.35742,1.35742,1688177277,1,10c93_00000
Trainable_10c93_00001,2023-07-01_11-08-09,True,DESKTOP-0P789CI,1,0.804721,1.05399,172.26.215.93,205481,1.00542,13.4919,13.4919,13.4919,1688177289,1,10c93_00001
Trainable_10c93_00002,2023-07-01_11-07-57,True,DESKTOP-0P789CI,1,0.787497,1.04705,172.26.215.93,205482,0.986632,2.38128,2.38128,2.38128,1688177277,1,10c93_00002
Trainable_10c93_00003,2023-07-01_11-07-56,True,DESKTOP-0P789CI,1,0.803448,1.17208,172.26.215.93,205483,1.00751,1.14628,1.14628,1.14628,1688177276,1,10c93_00003
Trainable_10c93_00004,2023-07-01_11-07-59,True,DESKTOP-0P789CI,1,0.796168,1.23012,172.26.215.93,205484,0.996956,4.1471,4.1471,4.1471,1688177279,1,10c93_00004
Trainable_10c93_00005,2023-07-01_11-07-59,True,DESKTOP-0P789CI,1,0.790286,1.02075,172.26.215.93,205485,0.995224,4.05969,4.05969,4.05969,1688177279,1,10c93_00005
Trainable_10c93_00006,2023-07-01_11-08-05,True,DESKTOP-0P789CI,1,0.80289,1.57191,172.26.215.93,205486,1.00859,9.55607,9.55607,9.55607,1688177285,1,10c93_00006
Trainable_10c93_00007,2023-07-01_11-07-56,True,DESKTOP-0P789CI,1,0.816287,2.72519,172.26.215.93,205487,1.02686,0.681098,0.681098,0.681098,1688177276,1,10c93_00007
Trainable_10c93_00008,2023-07-01_11-08-13,True,DESKTOP-0P789CI,1,0.793757,1.16331,172.26.215.93,206007,0.998171,1.05929,1.05929,1.05929,1688177293,1,10c93_00008
Trainable_10c93_00009,2023-07-01_11-09-29,True,DESKTOP-0P789CI,1,0.798588,1.12071,172.26.215.93,206010,0.997699,75.8652,75.8652,75.8652,1688177369,1,10c93_00009




