In [1]:
from datasets.datamodule import DEEPscreenDataModule
from engine.system import DEEPScreenClassifier
from lightning import Trainer
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks import RichProgressBar
import pandas as pd

In [3]:
from ray.train.lightning import RayDDPStrategy, RayLightningEnvironment, RayTrainReportCallback, prepare_trainer
from ray import tune
from ray.tune.schedulers import MedianStoppingRule
from ray.train import RunConfig, ScalingConfig, CheckpointConfig
from ray.train.torch import TorchTrainer
import os
from utils.configurations import configs


class deepscreen_hyperparameter_tuneing:
    def __init__(self,data:pd.DataFrame,search_space:dict,target:str,data_split_mode:str="non_random_split",grace_period:int=90,metric:str="val_mcc",mode:str="max",num_workers:int=1,num_samples:int=10,experiments_result_path="../../.experiments/"):

        self.data = data
        self.search_space = search_space
        self.num_samples =  num_samples
        self.target = target
        self.data_split_mode = data_split_mode
        self.experiment_result_path = os.path.join(experiments_result_path,self.target)
        if not os.path.exists(self.experiment_result_path):
                os.makedirs(self.experiment_result_path)

        self.scheduler = MedianStoppingRule(
            time_attr="training_iteration",
            metric=metric,
            mode=mode,
            grace_period=grace_period # number of epochs to wait to stop trianing if median mcc is not better
            )
        
        self.scaling_config = ScalingConfig(
            num_workers=num_workers, use_gpu=True, resources_per_worker={"CPU": 1, "GPU": 1}
            )

        self.run_config = RunConfig(
            checkpoint_config=CheckpointConfig(
                num_to_keep=2,
                checkpoint_score_attribute=metric,
                checkpoint_score_order=mode,
            ),
            )

        self.ray_trainer = TorchTrainer(
            self._train_func,
            scaling_config=self.scaling_config,
            run_config=self.run_config,
        )

    def _train_func(self,config):
        dm = DEEPscreenDataModule(
             data=self.data,
             target_id=self.target,
             batch_size=config["batch_size"],
             experiment_result_path=self.experiment_result_path,
             data_split_mode=self.data_split_mode,
             tmp_imgs=configs.get_use_tmp_imgs())
        model = DEEPScreenClassifier(**config,experiment_result_path=self.experiment_result_path)

        trainer = Trainer(
            devices="auto",
            accelerator="auto",
            strategy=RayDDPStrategy(),
            callbacks=[RayTrainReportCallback()],
            plugins=[RayLightningEnvironment()],
            enable_progress_bar=False,
        )
        trainer = prepare_trainer(trainer)
        trainer.fit(model, datamodule=dm)

    def tune_deepscreen(self):

        tuner = tune.Tuner(
            self.ray_trainer,
            param_space={"train_loop_config": self.search_space},
            tune_config=tune.TuneConfig(
                num_samples=self.num_samples,
                scheduler=self.scheduler,
            ),
        )
        return tuner.fit()


In [4]:
data = pd.read_csv("../../.data/processed/CHEMBL5567.csv")

In [5]:
search_space_deepscreen = {
        'fully_layer_1': tune.choice([16, 32, 128, 256, 512]),
        'fully_layer_2': tune.choice([16, 32, 128, 256, 512]),
        'learning_rate': tune.choice([0.0005, 0.0001, 0.005, 0.001, 0.01]),
        'batch_size': tune.choice([32, 64]),
        'drop_rate': tune.choice([0.5, 0.6, 0.8]),
    }

In [7]:
tuner = deepscreen_hyperparameter_tuneing(data,search_space_deepscreen,"chembl5567")
tuner.tune_deepscreen()

2024-02-08 17:05:26,571	INFO worker.py:1724 -- Started a local Ray instance.
2024-02-08 17:05:27,554	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-02-08 17:05:27,557	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


ValueError: You passed a `metric` or `mode` argument to `Tuner(...)`, but the scheduler you are using was already instantiated with their own `metric` and `mode` parameters. Either remove the arguments from your scheduler or from `Tuner(...)` args.