In [9]:
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, average_precision_score
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.suggest.bayesopt import BayesOptSearch
from sklearn.metrics import f1_score

load_dotenv


ModuleNotFoundError: No module named 'ray.tune.suggest'

In [2]:
# Assuming feature_df and targets_df are already defined
data_dir = os.getenv("DATA")
minmax_df = pd.read_csv(Path(data_dir) / "minmax_dataset.csv")
targets_df = pd.read_csv(Path(data_dir) / "target.csv")

In [3]:
X = minmax_df.sample(n=50000)
ids = X.id
y = targets_df[targets_df.id.isin(ids)].drop(columns=["id"]).values.reshape(-1)
X = X.values

In [4]:
X_id = ray.put(X)
y_id = ray.put(y)

2024-09-30 12:18:45,089	INFO worker.py:1777 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [5]:
ray.available_resources()

{'GPU': 1.0,
 'memory': 23334279373.0,
 'accelerator_type:G': 1.0,
 'object_store_memory': 10000000000.0,
 'CPU': 12.0,
 'node:__internal_head__': 1.0,
 'node:172.17.0.2': 1.0}

In [6]:
from utils.evaluators import evaluate_network

In [7]:
def train_dnn(config):
    X = ray.get(X_id)
    y = ray.get(y_id)
    _, _, f1 = evaluate_network(X, y, lr=config["learning_rate"], layer_sizes=[config["units_1"], config["units_1"]])
    return {"f1_score": f1}


def train_model(config):
    return train_dnn(config)


search_space = {
    # DNN hyperparameters
    "units_1": tune.randint(16, 128),
    "units_2": tune.randint(16, 128),
    "learning_rate": tune.loguniform(1e-4, 1e-2)
}

ray.init(num_cpus=8, num_gpus=1, ignore_reinit_error=True)

analysis = tune.run(train_model,
                    config=search_space,
                    num_samples=50,
                    search_alg=BayesOptSearch(metric="f1_score", mode="max"),
                    scheduler=ASHAScheduler(metric="f1_score", mode="max"),
                    resources_per_trial={
                        "cpu": 1,
                        "gpu": 1/8
                    })

best_config = analysis.get_best_config(metric="f1_score", mode="max")
best_trial = analysis.get_best_trial(metric="f1_score", mode="max")

print("Best config:", best_config)
print("Best F1 Score:", best_trial.last_result["f1_score"])

ray.shutdown()

2024-09-30 12:18:46,922	INFO worker.py:1619 -- Calling ray.init() again after it has already been called.
2024-09-30 12:18:46,924	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-09-30 12:37:09
Running for:,00:18:22.86
Memory:,23.9/47.0 GiB

Trial name,status,loc,learning_rate,units_1,units_2,iter,total time (s),f1_score
train_model_247ab_00000,TERMINATED,172.17.0.2:46022,0.000469946,46,50,1,155.814,0.0
train_model_247ab_00001,TERMINATED,172.17.0.2:46023,0.00442509,111,88,1,185.498,0.0
train_model_247ab_00002,TERMINATED,172.17.0.2:46024,0.000913068,35,124,1,151.302,0.0
train_model_247ab_00003,TERMINATED,172.17.0.2:46026,0.000246856,54,126,1,149.368,0.0
train_model_247ab_00004,TERMINATED,172.17.0.2:46025,0.00239071,65,117,1,176.511,0.0
train_model_247ab_00005,TERMINATED,172.17.0.2:46027,0.000872243,124,112,1,192.704,0.0
train_model_247ab_00006,TERMINATED,172.17.0.2:46028,0.000463264,77,125,1,172.471,0.0741847
train_model_247ab_00007,TERMINATED,172.17.0.2:46029,0.000111618,17,76,1,147.666,0.000969932
train_model_247ab_00008,TERMINATED,172.17.0.2:46933,0.00013264,104,21,1,169.5,0.0741452
train_model_247ab_00009,TERMINATED,172.17.0.2:46988,0.00012451,87,118,1,168.713,0.0


[36m(train_model pid=46029)[0m 
[36m(train_model pid=46023)[0m 
[36m(train_model pid=46028)[0m 
[36m(train_model pid=46024)[0m 
[36m(train_model pid=46022)[0m 
[36m(train_model pid=46026)[0m 
[36m(train_model pid=46027)[0m 
[36m(train_model pid=46025)[0m 
[36m(train_model pid=46029)[0m Loss: 15266.570238834234
[36m(train_model pid=46029)[0m Loss: 1160.1594111709855[32m [repeated 16x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m
[36m(train_model pid=46023)[0m Loss: 19834.02746124824[32m [repeated 14x across cluster][0m
[36m(train_model pid=46029)[0m Loss: 1098.2790231795516[32m [repeated 14x across cluster][0m
[36m(train_model pid=46023)[0m Loss: 776.2787184864283[32m [repeated 13x across cluster][0m
[36m(train_model pid=46026)[0m Loss: 28884.76565052433[32m [repeated

Trial name,f1_score
train_model_247ab_00000,0.0
train_model_247ab_00001,0.0
train_model_247ab_00002,0.0
train_model_247ab_00003,0.0
train_model_247ab_00004,0.0
train_model_247ab_00005,0.0
train_model_247ab_00006,0.0741847
train_model_247ab_00007,0.000969932
train_model_247ab_00008,0.0741452
train_model_247ab_00009,0.0


2024-09-30 12:37:09,803	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/train_model_2024-09-30_12-18-46' in 0.0112s.
2024-09-30 12:37:09,824	INFO tune.py:1041 -- Total run time: 1102.90 seconds (1102.85 seconds for the tuning loop).


Best config: {'units_1': 127, 'units_2': 118, 'learning_rate': 0.00018706814905236274}
Best F1 Score: 0.07578417291699914
[36m(train_model pid=51378)[0m Loss: 26110.36237318618[32m [repeated 2x across cluster][0m


In [None]:
ray.available.res