In [1]:
import sys
import os
from pathlib import Path

current_dir = str(Path(os.getcwd()).parent.parent)
print(f"Current dir: {current_dir}")
sys.path.insert(0, current_dir)

Current dir: d:\Programming\Python\comfyui-image-scorer


In [2]:
import numpy as np
from step03training.full_data.data_utils import (
    load_training_data,
    filter_unused_features,
    add_interaction_features,
    load_training_scores,
)

from shared.paths import vectors_file, scores_file, filtered_data

print("--- Step 1: Loading Data ---")
if not os.path.exists(filtered_data):
    x, y = load_training_data(vectors_file, scores_file)
else:
    x = np.array([])
    y = load_training_scores(scores_file)


print(f"Loaded Data Shape: {x.shape}")

print("\n--- Step 2: Filtering Unused Features ---")
# Removes features with zero variance or zero importance in a quick probe
x, kept_indices = filter_unused_features(x, y)
print(f"Filtered Data Shape: {x.shape}")

print("\n--- Step 3: Generating Interaction Features ---")
# Adds top 500 polynomial interactions (feature_A * feature_B)
x, _ = add_interaction_features(x, y, target_k=200)
print(f"Final Data Shape (with Interactions): {x.shape}")
print("Data Preparation Complete.")

prepare_dir: D:\Programming\Python\comfyui-image-scorer\step02prepare
--- Step 1: Loading Data ---
Loaded Data Shape: (7411, 2340)

--- Step 2: Filtering Unused Features ---
Filtering features... Initial shape: (7411, 2340)


Training LightGBM:  99%|█████████▉| 99/100 [00:09<00:00, 15.91it/s]

Dropped 998 unused features. New shape: (7411, 1342)


Training LightGBM: 100%|██████████| 100/100 [00:10<00:00,  9.17it/s]


Saved filtered data to cache: D:\Programming\Python\comfyui-image-scorer\step03training\output\filtered_data.npz
Filtered Data Shape: (7411, 1342)

--- Step 3: Generating Interaction Features ---
Scanning 899811 potential interactions in batches of 595...


Computing Correlations: 100%|██████████| 7411/7411 [00:52<00:00, 140.13samples/s]


Selecting top 200 interaction features...


Building Interaction Matrix: 100%|██████████| 7411/7411 [00:14<00:00, 497.22samples/s]


Saved interaction data to cache: D:\Programming\Python\comfyui-image-scorer\step03training\output\interaction_data.npz
Final Data Shape (with Interactions): (7411, 1542)
Data Preparation Complete.


In [3]:
import random
from IPython.display import clear_output
from shared.config import config
from step03training.full_data.run import optimize_hyperparameters
from step03training.full_data.config_utils import (
    crossover_config,
    generate_random_config,
)

# Setup Loop Variables
current_cfg = generate_random_config()
top_cfg = config["training"]["top"]
fastest_cfg = config["training"]["fastest"]
slowest_cfg = config["training"]["slowest"]
max_iters = config["training"]["max_iters"]
max_combos = config["training"]["max_combos"]
print(f"Initialized HPO loop with max_iters={max_iters}, max_combos={max_combos}")

for i in range(max_iters):
    clear_output(wait=True)
    # Refresh references
    top_cfg = config["training"]["top"]
    fastest_cfg = config["training"]["fastest"]
    slowest_cfg = config["training"]["slowest"]

    max_combos = config["training"]["max_combos"]

    # Strategy selection
    rand_val = random.random()
    if rand_val < 0.05:
        base_cfg = current_cfg
        strategy = "RANDOM_START"
    elif rand_val < 0.25:
        base_cfg = fastest_cfg
        strategy = "FASTEST"
    elif rand_val < 0.60:
        base_cfg = top_cfg
        strategy = "TOP"
    elif rand_val < 0.85:
        # Crossover
        candidates = [
            c for c in [top_cfg, fastest_cfg, slowest_cfg] if c["best_score"] > -9999
        ]
        if len(candidates) < 2:
            candidates = [c for c in [top_cfg, fastest_cfg, current_cfg] if c]
            if len(candidates) < 2:
                candidates = [generate_random_config(), generate_random_config()]

        parents = random.sample(candidates, 2)
        base_cfg = crossover_config(dict(parents[0]), dict(parents[1]))
        strategy = "CROSSOVER"
    else:
        base_cfg = slowest_cfg
        strategy = "SLOWEST"
        # max_combos = 1

    print(f"\nIter {i + 1}/{max_iters} — Strategy: {strategy}")

    # optimize_hyperparameters handles updates and saving internally now.
    results = optimize_hyperparameters(
        base_cfg=base_cfg, max_combos=max_combos, X=x, y=y, strategy=strategy
    )

    # Info only loop
    for candidate_cfg, metrics in results:
        r2 = metrics["r2"]
        t_time = metrics["training_time"]

        # Check if this result matches current bests (just for log/info)
        is_top = r2 == top_cfg["best_score"]
        is_fastest = t_time == fastest_cfg["training_time"]

        if is_top:
            print(f"  [Info] This batch produced the current TOP score: {r2:.6f}")
        if is_fastest:
            print(
                f"  [Info] This batch produced the current FASTEST time: {t_time:.4f}s"
            )

    # Refresh local current_cfg for next RANDOM_START or Crossover inheritance
    if results:
        # Simply take the last one as 'current' for random drift
        current_cfg = results[-1][0]

print(f"\nFinished optimization.")
print(f"Top R2: {top_cfg['best_score']:.6f}")
print(
    f"Fastest Time: {fastest_cfg['training_time']:.6f}s (R2: {fastest_cfg['best_score']:.6f})"
)
print(
    f"Slowest (High Score) Time: {slowest_cfg['training_time']:.6f}s (R2: {slowest_cfg['best_score']:.6f})"
)


Iter 37/100 — Strategy: TOP
Optimizing hyperparameters over grid: {'learning_rate': [0.008601467189043123, 0.007819515626402838, 0.007037564063762555], 'n_estimators': [1782, 1458, 1061], 'num_leaves': [563, 512, 460], 'max_depth': [85, 82, 67], 'min_child_samples': [170, 118, 97], 'reg_alpha': [4.72146918997735, 0.45920571922019277, 0.3757137702710668], 'reg_lambda': [2.2986200440013826, 2.0896545854558024, 1.8806891269102222], 'subsample': [0.9141078209639405, 0.8310071099672186, 0.7479063989704967], 'colsample_bytree': [0.4743479300213063, 0.43122539092846024, 0.3881028518356142], 'min_split_gain': [0.44945746577408136, 0.40859769615825575, 0.3677379265424302], 'early_stopping_rounds': [76, 38, 31]}
Evaluating hyperparameter combo 1/3, with params: {'learning_rate': 0.007819515626402838, 'n_estimators': 1782, 'num_leaves': 512, 'max_depth': 82, 'min_child_samples': 118, 'reg_alpha': 0.45920571922019277, 'reg_lambda': 1.8806891269102222, 'subsample': 0.8310071099672186, 'colsample_b

Training LightGBM: 100%|██████████| 1782/1782 [00:30<00:00, 57.65it/s] 


r2=0.4283, time=30.9085s for Evaluated params {'learning_rate': 0.007819515626402838, 'n_estimators': 1782, 'num_leaves': 512, 'max_depth': 82, 'min_child_samples': 118, 'reg_alpha': 0.45920571922019277, 'reg_lambda': 1.8806891269102222, 'subsample': 0.8310071099672186, 'colsample_bytree': 0.3881028518356142, 'min_split_gain': 0.40859769615825575, 'early_stopping_rounds': 76}
Evaluating hyperparameter combo 2/3, with params: {'learning_rate': 0.007037564063762555, 'n_estimators': 1782, 'num_leaves': 563, 'max_depth': 67, 'min_child_samples': 118, 'reg_alpha': 4.72146918997735, 'reg_lambda': 1.8806891269102222, 'subsample': 0.8310071099672186, 'colsample_bytree': 0.4743479300213063, 'min_split_gain': 0.3677379265424302, 'early_stopping_rounds': 31}


Training LightGBM:  54%|█████▍    | 962/1782 [00:18<00:13, 62.75it/s]

KeyboardInterrupt: 