In [1]:
import sys
import os
from pathlib import Path

current_dir = str(Path(os.getcwd()).parent.parent)
print(f"Current dir: {current_dir}")
sys.path.insert(0, current_dir)

Current dir: d:\Programming\Python\comfyui-image-scorer


In [2]:
import numpy as np
from step03training.full_data.data_utils import (
    load_training_data,
    filter_unused_features,
    add_interaction_features,
    load_training_scores,
)

from shared.paths import vectors_file, scores_file, filtered_data

print("--- Step 1: Loading Data ---")
if not os.path.exists(filtered_data):
    x, y = load_training_data(vectors_file, scores_file)
else:
    x = np.array([])
    y = load_training_scores(scores_file)


print(f"Loaded Data Shape: {x.shape}")

print("\n--- Step 2: Filtering Unused Features ---")
# Removes features with zero variance or zero importance in a quick probe
x, kept_indices = filter_unused_features(x, y)
print(f"Filtered Data Shape: {x.shape}")

print("\n--- Step 3: Generating Interaction Features ---")
# Adds top 500 polynomial interactions (feature_A * feature_B)
x, _ = add_interaction_features(x, y, target_k=200)
print(f"Final Data Shape (with Interactions): {x.shape}")
print("Data Preparation Complete.")

prepare_dir: D:\Programming\Python\comfyui-image-scorer\step02prepare
--- Step 1: Loading Data ---
Loaded Data Shape: (0,)

--- Step 2: Filtering Unused Features ---
Filtering features... Initial shape: (0,)
Loading filtered data from cache: D:\Programming\Python\comfyui-image-scorer\step03training\output\filtered_data.npz
Filtered Data Shape: (8218, 2254)

--- Step 3: Generating Interaction Features ---
Loading interaction data from cache: D:\Programming\Python\comfyui-image-scorer\step03training\output\interaction_data.npz
Final Data Shape (with Interactions): (8218, 2454)
Data Preparation Complete.


In [3]:
import random
from IPython.display import clear_output
from shared.config import config
from step03training.full_data.run import optimize_hyperparameters
from step03training.full_data.config_utils import (
    crossover_config,
    generate_random_config,
    #generate_slowest_setup
)

# Setup Loop Variables
current_cfg = generate_random_config()
top_cfg = config["training"]["top"]
fastest_cfg = config["training"]["fastest"]
slowest_cfg = config["training"]["slowest"]
#slowest_cfg=generate_slowest_setup()
#config["training"]["slowest"]= slowest_cfg
max_iters = config["training"]["max_iters"]
max_combos = config["training"]["max_combos"]
print(f"Initialized HPO loop with max_iters={max_iters}, max_combos={max_combos}")

for i in range(max_iters):
    clear_output(wait=True)
    # Refresh references
    top_cfg = config["training"]["top"]
    fastest_cfg = config["training"]["fastest"]
    slowest_cfg = config["training"]["slowest"]

    max_combos = config["training"]["max_combos"]

    # Strategy selection
    rand_val = random.random()
    if rand_val < 0.05:
        base_cfg = current_cfg
        strategy = "RANDOM_START"
    elif rand_val < 0.25:
        base_cfg = fastest_cfg
        strategy = "FASTEST"
    elif rand_val < 0.60:
        base_cfg = top_cfg
        strategy = "TOP"
    elif rand_val < 0.85:
        # Crossover
        candidates = [
            c for c in [top_cfg, fastest_cfg, slowest_cfg] if c["best_score"] > -9999
        ]
        if len(candidates) < 2:
            candidates = [c for c in [top_cfg, fastest_cfg, current_cfg] if c]
            if len(candidates) < 2:
                candidates = [generate_random_config(), generate_random_config()]

        parents = random.sample(candidates, 2)
        base_cfg = crossover_config(dict(parents[0]), dict(parents[1]))
        strategy = "CROSSOVER"
    else:
        base_cfg = slowest_cfg
        strategy = "SLOWEST"
        # max_combos = 1

    print(f"\nIter {i + 1}/{max_iters} — Strategy: {strategy}")

    # optimize_hyperparameters handles updates and saving internally now.
    results = optimize_hyperparameters(
        base_cfg=base_cfg, max_combos=max_combos, X=x, y=y, strategy=strategy
    )

    # Info only loop
    for candidate_cfg, metrics in results:
        r2 = metrics["r2"]
        t_time = metrics["training_time"]

        # Check if this result matches current bests (just for log/info)
        is_top = r2 == top_cfg["best_score"]
        is_fastest = t_time == fastest_cfg["training_time"]

        if is_top:
            print(f"  [Info] This batch produced the current TOP score: {r2:.6f}")
        if is_fastest:
            print(
                f"  [Info] This batch produced the current FASTEST time: {t_time:.4f}s"
            )

    # Refresh local current_cfg for next RANDOM_START or Crossover inheritance
    if results:
        # Simply take the last one as 'current' for random drift
        current_cfg = results[-1][0]

print(f"\nFinished optimization.")
print(f"Top R2: {top_cfg['best_score']:.6f}")
print(
    f"Fastest Time: {fastest_cfg['training_time']:.6f}s (R2: {fastest_cfg['best_score']:.6f})"
)
print(
    f"Slowest (High Score) Time: {slowest_cfg['training_time']:.6f}s (R2: {slowest_cfg['best_score']:.6f})"
)


Iter 1/100 — Strategy: TOP
Optimizing hyperparameters over grid: {'learning_rate': [0.009461613907947437, 0.008601467189043123, 0.007741320470138811], 'n_estimators': [1665, 1514, 1362], 'num_leaves': [506, 460, 414], 'max_depth': [90, 82, 73], 'min_child_samples': [129, 118, 106], 'reg_alpha': [0.45920571922019277, 0.41745974474562975, 0.3757137702710668], 'reg_lambda': [0.579736292997068, 0.5270329936336982, 0.47432969427032834], 'subsample': [0.11949940696584722, 0.10863582451440656, 0.1], 'colsample_bytree': [0.2952439954668186, 0.26840363224256236, 0.24156326901830613], 'min_split_gain': [0.4045117191966733, 0.3677379265424302, 0.3309641338881872], 'early_stopping_rounds': [38, 35, 31]}
Evaluating hyperparameter combo 1/3, with params: {'learning_rate': 0.009461613907947437, 'n_estimators': 1665, 'num_leaves': 506, 'max_depth': 73, 'min_child_samples': 106, 'reg_alpha': 0.41745974474562975, 'reg_lambda': 0.5270329936336982, 'subsample': 0.1, 'colsample_bytree': 0.2952439954668186

Training LightGBM:  93%|█████████▎| 1546/1665 [00:28<00:02, 53.48it/s] 


r2=0.3795, time=28.9095s for Evaluated params {'learning_rate': 0.009461613907947437, 'n_estimators': 1665, 'num_leaves': 506, 'max_depth': 73, 'min_child_samples': 106, 'reg_alpha': 0.41745974474562975, 'reg_lambda': 0.5270329936336982, 'subsample': 0.1, 'colsample_bytree': 0.2952439954668186, 'min_split_gain': 0.3677379265424302, 'early_stopping_rounds': 38}
Evaluating hyperparameter combo 2/3, with params: {'learning_rate': 0.007741320470138811, 'n_estimators': 1362, 'num_leaves': 506, 'max_depth': 82, 'min_child_samples': 106, 'reg_alpha': 0.41745974474562975, 'reg_lambda': 0.5270329936336982, 'subsample': 0.1, 'colsample_bytree': 0.24156326901830613, 'min_split_gain': 0.3677379265424302, 'early_stopping_rounds': 31}


Training LightGBM: 100%|██████████| 1362/1362 [00:24<00:00, 54.57it/s]


r2=0.3515, time=24.9606s for Evaluated params {'learning_rate': 0.007741320470138811, 'n_estimators': 1362, 'num_leaves': 506, 'max_depth': 82, 'min_child_samples': 106, 'reg_alpha': 0.41745974474562975, 'reg_lambda': 0.5270329936336982, 'subsample': 0.1, 'colsample_bytree': 0.24156326901830613, 'min_split_gain': 0.3677379265424302, 'early_stopping_rounds': 31}
Evaluating hyperparameter combo 3/3, with params: {'learning_rate': 0.008601467189043123, 'n_estimators': 1514, 'num_leaves': 460, 'max_depth': 90, 'min_child_samples': 129, 'reg_alpha': 0.45920571922019277, 'reg_lambda': 0.47432969427032834, 'subsample': 0.11949940696584722, 'colsample_bytree': 0.24156326901830613, 'min_split_gain': 0.3309641338881872, 'early_stopping_rounds': 31}


Training LightGBM:  99%|█████████▉| 1506/1514 [00:22<00:00, 86.35it/s]

KeyboardInterrupt: 

Training LightGBM: 100%|█████████▉| 1508/1514 [00:35<00:00, 86.35it/s]