In [1]:
import sys
import os
from pathlib import Path

current_dir = str(Path(os.getcwd()).parent.parent)
print(f"Current dir: {current_dir}")
sys.path.insert(0, current_dir)

Current dir: d:\Programming\Python\comfyui-image-scorer


In [None]:
import numpy as np
from step02prepare.full_data.data.processing import clean_training_artifacts
from step03training.full_data.data_utils import (
    load_training_data,
    filter_unused_features,
    add_interaction_features,
    load_training_scores,
)

from shared.paths import vectors_file, scores_file, filtered_data

retrain=False
if retrain:
    clean_training_artifacts()

print("--- Step 1: Loading Data ---")
if not os.path.exists(filtered_data):
    x, y = load_training_data(vectors_file, scores_file)
else:
    x = np.array([])
    y = load_training_scores(scores_file)


print(f"Loaded Data Shape: {x.shape}")

print("\n--- Step 2: Filtering Unused Features ---")
# Removes features with zero variance or zero importance in a quick probe
x, kept_indices = filter_unused_features(x, y,500)
print(f"Filtered Data Shape: {x.shape}")

print("\n--- Step 3: Generating Interaction Features ---")
# Adds top 500 polynomial interactions (feature_A * feature_B)
x, _ = add_interaction_features(x, y, target_k=1000)
print(f"Final Data Shape (with Interactions): {x.shape}")
print("Data Preparation Complete.")

Importing shared modules...
prepare_dir: D:\Programming\Python\comfyui-image-scorer\step02prepare
Importing config modules...
Importing data processing modules...
Importing vision model modules...
Importing torch...
Importing PIL...
Importing feature modules...
Removing training artifact: D:\Programming\Python\comfyui-image-scorer\step03training\output\filtered_data.npz
--- Step 1: Loading Data ---
Loaded Data Shape: (13452, 2392)

--- Step 2: Filtering Unused Features ---
Filtering features... Initial shape: (13452, 2392)


Training LightGBM:  66%|██████▌   | 329/500 [00:26<00:13, 12.93it/s]

In [None]:
import random
from IPython.display import clear_output
from shared.config import config
from step03training.full_data.run import optimize_hyperparameters, evaluate_hyperparameter_combo
from step03training.full_data.config_utils import (
    crossover_config,
    generate_random_config,
    #generate_slowest_setup
)
from shared.paths import training_output_dir

# Setup Loop Variables
current_cfg = generate_random_config()
top_cfg = config["training"]["top"]
fastest_cfg = config["training"]["fastest"]
slowest_cfg = config["training"]["slowest"]
#slowest_cfg=generate_slowest_setup()
#config["training"]["slowest"]= slowest_cfg
max_iters = config["training"]["max_iters"]
max_combos = config["training"]["max_combos"]
print(f"Initialized HPO loop with max_iters={max_iters}, max_combos={max_combos}")

temp_model_base = os.path.join(training_output_dir, "temp_model")

get_base_score=False
if get_base_score:
    for value in ["top","fastest","slowest"]:
        print(f"getting {value} base score ...")
        current=config["training"][value]
        score, t_time = evaluate_hyperparameter_combo(current , temp_model_base, X=x, y=y)
        new_current = {**current , "best_score": score, "training_time": t_time}
        current.update(new_current)
        print (f"score: {score}")



for i in range(max_iters):
    if i%10==0 and i>0:
        clear_output(wait=True)
    # Refresh references
    top_cfg = config["training"]["top"]
    fastest_cfg = config["training"]["fastest"]
    slowest_cfg = config["training"]["slowest"]

    max_combos = config["training"]["max_combos"]

    # Strategy selection
    rand_val = random.random()
    if rand_val < 0.1:
        base_cfg = generate_random_config()
        strategy = "RANDOM_START"
    elif rand_val < 0.3:
        base_cfg = fastest_cfg
        strategy = "FASTEST"
    elif rand_val < 0.8:
        base_cfg = top_cfg
        strategy = "TOP"
    elif rand_val < 0.9:
         base_cfg = slowest_cfg
         strategy = "SLOWEST"
    else:
        # Crossover
        candidates = [
            c for c in [top_cfg, fastest_cfg, slowest_cfg] if c["best_score"] > -9999
        ]
        if len(candidates) < 2:
            candidates = [c for c in [top_cfg, fastest_cfg, current_cfg] if c]
            if len(candidates) < 2:
                candidates = [generate_random_config(), generate_random_config()]

        parents = random.sample(candidates, 2)
        base_cfg = crossover_config(dict(parents[0]), dict(parents[1]))
        strategy = "CROSSOVER"

    print(f"\nIter {i + 1}/{max_iters} — Strategy: {strategy}")
    print (f"current score; {base_cfg["best_score"]}")
    # optimize_hyperparameters handles updates and saving internally now.
    results = optimize_hyperparameters(
        base_cfg=base_cfg, max_combos=max_combos, X=x, y=y, strategy=strategy
    )

    # Info only loop
    for candidate_cfg, metrics in results:
        r2 = metrics["r2"]
        t_time = metrics["training_time"]

        # Check if this result matches current bests (just for log/info)
        is_top = r2 == top_cfg["best_score"]
        is_fastest = t_time == fastest_cfg["training_time"]

        if is_top:
            print(f"  [Info] This batch produced the current TOP score: {r2:.6f}")
        if is_fastest:
            print(
                f"  [Info] This batch produced the current FASTEST time: {t_time:.4f}s"
            )

    # Refresh local current_cfg for next RANDOM_START or Crossover inheritance
    if results:
        # Simply take the last one as 'current' for random drift
        current_cfg = results[-1][0]

print(f"\nFinished optimization.")
print(f"Top R2: {top_cfg['best_score']:.6f}")
print(
    f"Fastest Time: {fastest_cfg['training_time']:.6f}s (R2: {fastest_cfg['best_score']:.6f})"
)
print(
    f"Slowest (High Score) Time: {slowest_cfg['training_time']:.6f}s (R2: {slowest_cfg['best_score']:.6f})"
)


Iter 11/20 — Strategy: SLOWEST
current score; 0.37040756110632145
last used keys for SLOWEST: ['num_leaves', 'reg_lambda', 'early_stopping_rounds']
Optimizing hyperparameters over grid: {'max_depth': [81, 66], 'colsample_bytree': [0.19646519159727913], 'min_child_samples': [46], 'n_estimators': [783], 'min_split_gain': [0.011000000000000001], 'learning_rate': [0.018142654163951798], 'reg_alpha': [0.04365044329041002], 'subsample': [0.15465545010725337], 'num_leaves': [990], 'reg_lambda': [0.39447162368857014], 'early_stopping_rounds': [139]}
______________________________
Evaluating hyperparameter combo 1/2, with params: {'max_depth': 81, 'colsample_bytree': 0.19646519159727913, 'min_child_samples': 46, 'n_estimators': 783, 'min_split_gain': 0.011000000000000001, 'learning_rate': 0.018142654163951798, 'reg_alpha': 0.04365044329041002, 'subsample': 0.15465545010725337, 'num_leaves': 990, 'reg_lambda': 0.39447162368857014, 'early_stopping_rounds': 139}


Training LightGBM: 100%|██████████| 783/783 [00:43<00:00, 17.93it/s]


r2=0.3704, time=43.6635s for Evaluated params {'max_depth': 81, 'colsample_bytree': 0.19646519159727913, 'min_child_samples': 46, 'n_estimators': 783, 'min_split_gain': 0.011000000000000001, 'learning_rate': 0.018142654163951798, 'reg_alpha': 0.04365044329041002, 'subsample': 0.15465545010725337, 'num_leaves': 990, 'reg_lambda': 0.39447162368857014, 'early_stopping_rounds': 139}
______________________________
Evaluating hyperparameter combo 2/2, with params: {'max_depth': 66, 'colsample_bytree': 0.19646519159727913, 'min_child_samples': 46, 'n_estimators': 783, 'min_split_gain': 0.011000000000000001, 'learning_rate': 0.018142654163951798, 'reg_alpha': 0.04365044329041002, 'subsample': 0.15465545010725337, 'num_leaves': 990, 'reg_lambda': 0.39447162368857014, 'early_stopping_rounds': 139}


Training LightGBM: 100%|██████████| 783/783 [00:43<00:00, 17.88it/s]


r2=0.3703, time=43.7954s for Evaluated params {'max_depth': 66, 'colsample_bytree': 0.19646519159727913, 'min_child_samples': 46, 'n_estimators': 783, 'min_split_gain': 0.011000000000000001, 'learning_rate': 0.018142654163951798, 'reg_alpha': 0.04365044329041002, 'subsample': 0.15465545010725337, 'num_leaves': 990, 'reg_lambda': 0.39447162368857014, 'early_stopping_rounds': 139}

Iter 12/20 — Strategy: TOP
current score; 0.39092413454461283
last used keys for TOP: ['n_estimators', 'min_child_samples', 'colsample_bytree']
Optimizing hyperparameters over grid: {'reg_lambda': [5.509970572089629, 4.5081577408006055], 'reg_alpha': [1.5288921794004653], 'num_leaves': [498], 'early_stopping_rounds': [66], 'subsample': [0.18679120761960066], 'learning_rate': [0.0403707390671591], 'max_depth': [12], 'min_split_gain': [0.01303224893094797], 'n_estimators': [1159], 'min_child_samples': [314], 'colsample_bytree': [0.1347357818178288]}
______________________________
Evaluating hyperparameter combo 

Training LightGBM: 100%|██████████| 1159/1159 [00:10<00:00, 108.29it/s]


r2=0.3891, time=10.7034s for Evaluated params {'reg_lambda': 5.509970572089629, 'reg_alpha': 1.5288921794004653, 'num_leaves': 498, 'early_stopping_rounds': 66, 'subsample': 0.18679120761960066, 'learning_rate': 0.0403707390671591, 'max_depth': 12, 'min_split_gain': 0.01303224893094797, 'n_estimators': 1159, 'min_child_samples': 314, 'colsample_bytree': 0.1347357818178288}
______________________________
Evaluating hyperparameter combo 2/2, with params: {'reg_lambda': 4.5081577408006055, 'reg_alpha': 1.5288921794004653, 'num_leaves': 498, 'early_stopping_rounds': 66, 'subsample': 0.18679120761960066, 'learning_rate': 0.0403707390671591, 'max_depth': 12, 'min_split_gain': 0.01303224893094797, 'n_estimators': 1159, 'min_child_samples': 314, 'colsample_bytree': 0.1347357818178288}


Training LightGBM: 100%|██████████| 1159/1159 [00:10<00:00, 107.61it/s]


r2=0.3854, time=10.7711s for Evaluated params {'reg_lambda': 4.5081577408006055, 'reg_alpha': 1.5288921794004653, 'num_leaves': 498, 'early_stopping_rounds': 66, 'subsample': 0.18679120761960066, 'learning_rate': 0.0403707390671591, 'max_depth': 12, 'min_split_gain': 0.01303224893094797, 'n_estimators': 1159, 'min_child_samples': 314, 'colsample_bytree': 0.1347357818178288}

Iter 13/20 — Strategy: FASTEST
current score; 0.37698011403701825
last used keys for FASTEST: ['num_leaves']
Optimizing hyperparameters over grid: {'learning_rate': [0.12208530863240925, 0.09988797979015301], 'reg_lambda': [10.0], 'min_split_gain': [0.2370812675031192], 'reg_alpha': [6.842495885918459], 'subsample': [0.29627025953957464], 'colsample_bytree': [0.1083359287653048], 'max_depth': [3], 'n_estimators': [2000], 'min_child_samples': [53], 'early_stopping_rounds': [200], 'num_leaves': [828]}
______________________________
Evaluating hyperparameter combo 1/2, with params: {'learning_rate': 0.1220853086324092

Training LightGBM:  54%|█████▍    | 1089/2000 [00:04<00:03, 229.71it/s]


r2=0.3728, time=4.7409s for Evaluated params {'learning_rate': 0.12208530863240925, 'reg_lambda': 10.0, 'min_split_gain': 0.2370812675031192, 'reg_alpha': 6.842495885918459, 'subsample': 0.29627025953957464, 'colsample_bytree': 0.1083359287653048, 'max_depth': 3, 'n_estimators': 2000, 'min_child_samples': 53, 'early_stopping_rounds': 200, 'num_leaves': 828}
______________________________
Evaluating hyperparameter combo 2/2, with params: {'learning_rate': 0.09988797979015301, 'reg_lambda': 10.0, 'min_split_gain': 0.2370812675031192, 'reg_alpha': 6.842495885918459, 'subsample': 0.29627025953957464, 'colsample_bytree': 0.1083359287653048, 'max_depth': 3, 'n_estimators': 2000, 'min_child_samples': 53, 'early_stopping_rounds': 200, 'num_leaves': 828}


Training LightGBM:  64%|██████▍   | 1286/2000 [00:05<00:02, 243.26it/s]


r2=0.3764, time=5.2869s for Evaluated params {'learning_rate': 0.09988797979015301, 'reg_lambda': 10.0, 'min_split_gain': 0.2370812675031192, 'reg_alpha': 6.842495885918459, 'subsample': 0.29627025953957464, 'colsample_bytree': 0.1083359287653048, 'max_depth': 3, 'n_estimators': 2000, 'min_child_samples': 53, 'early_stopping_rounds': 200, 'num_leaves': 828}

Iter 14/20 — Strategy: FASTEST
current score; 0.37698011403701825
last used keys for FASTEST: ['num_leaves', 'learning_rate']
Optimizing hyperparameters over grid: {'max_depth': [4, 2], 'colsample_bytree': [0.1083359287653048], 'reg_lambda': [10.0], 'subsample': [0.29627025953957464], 'reg_alpha': [6.842495885918459], 'early_stopping_rounds': [200], 'min_split_gain': [0.2370812675031192], 'n_estimators': [2000], 'min_child_samples': [53], 'num_leaves': [828], 'learning_rate': [0.11098664421128113]}
______________________________
Evaluating hyperparameter combo 1/2, with params: {'max_depth': 4, 'colsample_bytree': 0.108335928765304

Training LightGBM:  35%|███▍      | 697/2000 [00:04<00:07, 163.54it/s]


r2=0.3688, time=4.2624s for Evaluated params {'max_depth': 4, 'colsample_bytree': 0.1083359287653048, 'reg_lambda': 10.0, 'subsample': 0.29627025953957464, 'reg_alpha': 6.842495885918459, 'early_stopping_rounds': 200, 'min_split_gain': 0.2370812675031192, 'n_estimators': 2000, 'min_child_samples': 53, 'num_leaves': 828, 'learning_rate': 0.11098664421128113}
______________________________
Evaluating hyperparameter combo 2/2, with params: {'max_depth': 2, 'colsample_bytree': 0.1083359287653048, 'reg_lambda': 10.0, 'subsample': 0.29627025953957464, 'reg_alpha': 6.842495885918459, 'early_stopping_rounds': 200, 'min_split_gain': 0.2370812675031192, 'n_estimators': 2000, 'min_child_samples': 53, 'num_leaves': 828, 'learning_rate': 0.11098664421128113}


Training LightGBM: 100%|██████████| 2000/2000 [00:05<00:00, 337.09it/s]


r2=0.3725, time=5.9331s for Evaluated params {'max_depth': 2, 'colsample_bytree': 0.1083359287653048, 'reg_lambda': 10.0, 'subsample': 0.29627025953957464, 'reg_alpha': 6.842495885918459, 'early_stopping_rounds': 200, 'min_split_gain': 0.2370812675031192, 'n_estimators': 2000, 'min_child_samples': 53, 'num_leaves': 828, 'learning_rate': 0.11098664421128113}

Iter 15/20 — Strategy: TOP
current score; 0.39092413454461283
last used keys for TOP: ['n_estimators', 'min_child_samples', 'colsample_bytree', 'reg_lambda']
Optimizing hyperparameters over grid: {'min_split_gain': [0.014335473824042768, 0.011729024037853173], 'subsample': [0.18679120761960066], 'early_stopping_rounds': [66], 'learning_rate': [0.0403707390671591], 'reg_alpha': [1.5288921794004653], 'num_leaves': [498], 'max_depth': [12], 'n_estimators': [1159], 'min_child_samples': [314], 'colsample_bytree': [0.1347357818178288], 'reg_lambda': [5.009064156445117]}
______________________________
Evaluating hyperparameter combo 1/2, 

Training LightGBM: 100%|██████████| 1159/1159 [00:10<00:00, 107.40it/s]


r2=0.3909, time=10.7916s for Evaluated params {'min_split_gain': 0.014335473824042768, 'subsample': 0.18679120761960066, 'early_stopping_rounds': 66, 'learning_rate': 0.0403707390671591, 'reg_alpha': 1.5288921794004653, 'num_leaves': 498, 'max_depth': 12, 'n_estimators': 1159, 'min_child_samples': 314, 'colsample_bytree': 0.1347357818178288, 'reg_lambda': 5.009064156445117}
--> Found new TOP model! (old: 0.3909, new: 0.3909)
______________________________
Evaluating hyperparameter combo 2/2, with params: {'min_split_gain': 0.011729024037853173, 'subsample': 0.18679120761960066, 'early_stopping_rounds': 66, 'learning_rate': 0.0403707390671591, 'reg_alpha': 1.5288921794004653, 'num_leaves': 498, 'max_depth': 12, 'n_estimators': 1159, 'min_child_samples': 314, 'colsample_bytree': 0.1347357818178288, 'reg_lambda': 5.009064156445117}


Training LightGBM: 100%|██████████| 1159/1159 [00:10<00:00, 107.54it/s]


r2=0.3909, time=10.7781s for Evaluated params {'min_split_gain': 0.011729024037853173, 'subsample': 0.18679120761960066, 'early_stopping_rounds': 66, 'learning_rate': 0.0403707390671591, 'reg_alpha': 1.5288921794004653, 'num_leaves': 498, 'max_depth': 12, 'n_estimators': 1159, 'min_child_samples': 314, 'colsample_bytree': 0.1347357818178288, 'reg_lambda': 5.009064156445117}
  [Info] This batch produced the current TOP score: 0.390926

Iter 16/20 — Strategy: FASTEST
current score; 0.37698011403701825
last used keys for FASTEST: ['num_leaves', 'learning_rate', 'max_depth']
Optimizing hyperparameters over grid: {'n_estimators': [2000, 1800], 'reg_lambda': [10.0], 'min_split_gain': [0.2370812675031192], 'subsample': [0.29627025953957464], 'reg_alpha': [6.842495885918459], 'min_child_samples': [53], 'colsample_bytree': [0.1083359287653048], 'early_stopping_rounds': [200], 'num_leaves': [828], 'learning_rate': [0.11098664421128113], 'max_depth': [3]}
______________________________
Evaluating

Training LightGBM:  52%|█████▏    | 1032/2000 [00:04<00:04, 224.78it/s]


r2=0.3770, time=4.5913s for Evaluated params {'n_estimators': 2000, 'reg_lambda': 10.0, 'min_split_gain': 0.2370812675031192, 'subsample': 0.29627025953957464, 'reg_alpha': 6.842495885918459, 'min_child_samples': 53, 'colsample_bytree': 0.1083359287653048, 'early_stopping_rounds': 200, 'num_leaves': 828, 'learning_rate': 0.11098664421128113, 'max_depth': 3}
______________________________
Evaluating hyperparameter combo 2/2, with params: {'n_estimators': 1800, 'reg_lambda': 10.0, 'min_split_gain': 0.2370812675031192, 'subsample': 0.29627025953957464, 'reg_alpha': 6.842495885918459, 'min_child_samples': 53, 'colsample_bytree': 0.1083359287653048, 'early_stopping_rounds': 200, 'num_leaves': 828, 'learning_rate': 0.11098664421128113, 'max_depth': 3}


Training LightGBM:  57%|█████▋    | 1032/1800 [00:04<00:03, 221.33it/s]


r2=0.3770, time=4.6632s for Evaluated params {'n_estimators': 1800, 'reg_lambda': 10.0, 'min_split_gain': 0.2370812675031192, 'subsample': 0.29627025953957464, 'reg_alpha': 6.842495885918459, 'min_child_samples': 53, 'colsample_bytree': 0.1083359287653048, 'early_stopping_rounds': 200, 'num_leaves': 828, 'learning_rate': 0.11098664421128113, 'max_depth': 3}

Iter 17/20 — Strategy: FASTEST
current score; 0.37698011403701825
last used keys for FASTEST: ['num_leaves', 'learning_rate', 'max_depth', 'n_estimators']
Optimizing hyperparameters over grid: {'min_child_samples': [58, 47], 'reg_lambda': [10.0], 'colsample_bytree': [0.1083359287653048], 'min_split_gain': [0.2370812675031192], 'reg_alpha': [6.842495885918459], 'subsample': [0.29627025953957464], 'early_stopping_rounds': [200], 'num_leaves': [828], 'learning_rate': [0.11098664421128113], 'max_depth': [3], 'n_estimators': [2000]}
______________________________
Evaluating hyperparameter combo 1/2, with params: {'min_child_samples': 58

Training LightGBM:  58%|█████▊    | 1163/2000 [00:04<00:03, 238.94it/s]


r2=0.3732, time=4.8678s for Evaluated params {'min_child_samples': 58, 'reg_lambda': 10.0, 'colsample_bytree': 0.1083359287653048, 'min_split_gain': 0.2370812675031192, 'reg_alpha': 6.842495885918459, 'subsample': 0.29627025953957464, 'early_stopping_rounds': 200, 'num_leaves': 828, 'learning_rate': 0.11098664421128113, 'max_depth': 3, 'n_estimators': 2000}
______________________________
Evaluating hyperparameter combo 2/2, with params: {'min_child_samples': 47, 'reg_lambda': 10.0, 'colsample_bytree': 0.1083359287653048, 'min_split_gain': 0.2370812675031192, 'reg_alpha': 6.842495885918459, 'subsample': 0.29627025953957464, 'early_stopping_rounds': 200, 'num_leaves': 828, 'learning_rate': 0.11098664421128113, 'max_depth': 3, 'n_estimators': 2000}


Training LightGBM:  62%|██████▏   | 1249/2000 [00:04<00:03, 249.92it/s]


r2=0.3772, time=4.9976s for Evaluated params {'min_child_samples': 47, 'reg_lambda': 10.0, 'colsample_bytree': 0.1083359287653048, 'min_split_gain': 0.2370812675031192, 'reg_alpha': 6.842495885918459, 'subsample': 0.29627025953957464, 'early_stopping_rounds': 200, 'num_leaves': 828, 'learning_rate': 0.11098664421128113, 'max_depth': 3, 'n_estimators': 2000}

Iter 18/20 — Strategy: SLOWEST
current score; 0.37040756110632145
last used keys for SLOWEST: ['num_leaves', 'reg_lambda', 'early_stopping_rounds', 'max_depth']
Optimizing hyperparameters over grid: {'colsample_bytree': [0.21611171075700705, 0.17681867243755123], 'subsample': [0.15465545010725337], 'min_child_samples': [46], 'reg_alpha': [0.04365044329041002], 'learning_rate': [0.018142654163951798], 'min_split_gain': [0.011000000000000001], 'n_estimators': [783], 'num_leaves': [990], 'reg_lambda': [0.39447162368857014], 'early_stopping_rounds': [139], 'max_depth': [74]}
______________________________
Evaluating hyperparameter comb

Training LightGBM: 100%|██████████| 783/783 [00:45<00:00, 17.39it/s]


r2=0.3721, time=45.0311s for Evaluated params {'colsample_bytree': 0.21611171075700705, 'subsample': 0.15465545010725337, 'min_child_samples': 46, 'reg_alpha': 0.04365044329041002, 'learning_rate': 0.018142654163951798, 'min_split_gain': 0.011000000000000001, 'n_estimators': 783, 'num_leaves': 990, 'reg_lambda': 0.39447162368857014, 'early_stopping_rounds': 139, 'max_depth': 74}
--> Found new SLOWEST model! Old 0.3704 (Score: 0.3721, Time: 45.0311s)
______________________________
Evaluating hyperparameter combo 2/2, with params: {'colsample_bytree': 0.17681867243755123, 'subsample': 0.15465545010725337, 'min_child_samples': 46, 'reg_alpha': 0.04365044329041002, 'learning_rate': 0.018142654163951798, 'min_split_gain': 0.011000000000000001, 'n_estimators': 783, 'num_leaves': 990, 'reg_lambda': 0.39447162368857014, 'early_stopping_rounds': 139, 'max_depth': 74}


Training LightGBM:  10%|▉         | 75/783 [00:05<00:38, 18.58it/s]

KeyboardInterrupt: 