Load Your Covertype Data

In [1]:
import numpy as np
import pandas as pd
import sys
from pathlib import Path
from sklearn.datasets import fetch_openml
from sklearn.utils import resample
from sklearn.model_selection import train_test_split

# Add project to path
sys.path.append('/Users/srinivass/Budgetaware_hpo')

print("Loading Covertype dataset (50K samples)...")

# Load exactly as you do in your baseline
X, y = fetch_openml(
    name="covertype",
    version=2,
    as_frame=False,
    return_X_y=True
)

# Convert sparse to dense
if hasattr(X, 'toarray'):
    X = X.toarray()

y = y.astype(int)

# Subsample to 50K
MAX_SAMPLES = 50000
if X.shape[0] > MAX_SAMPLES:
    X, y = resample(
        X, y,
        n_samples=MAX_SAMPLES,
        stratify=y,
        random_state=42
    )

print(f"âœ… Dataset loaded: {X.shape}")
print(f"   Classes: {len(np.unique(y))}")

# Train/val/test split (same as your baseline)
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp
)

print(f"âœ… Splits created:")
print(f"   Train: {X_train.shape[0]}")
print(f"   Val: {X_val.shape[0]}")
print(f"   Test: {X_test.shape[0]}")

Loading Covertype dataset (50K samples)...
âœ… Dataset loaded: (50000, 54)
   Classes: 2
âœ… Splits created:
   Train: 30000
   Val: 10000
   Test: 10000


Suppress warning!

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Also suppress sklearn-specific warnings
import os
os.environ['PYTHONWARNINGS'] = 'ignore'

print("âœ… Warnings suppressed")



Run Hyperband on Covertype

In [3]:
from hpo.hyperband_implementation import Hyperband, get_random_mlp_config

print("=" * 70)
print("RUNNING HYPERBAND ON COVERTYPE (50K samples)")
print("=" * 70)

# Create Hyperband instance
hb = Hyperband(
    get_random_config=get_random_mlp_config,
    max_iter=81,      # Maximum iterations for one config
    eta=3,            # Reduction factor
    verbose=True
)

# Run Hyperband
print("\nðŸš€ Starting Hyperband optimization...")
print("This will take about 5-10 minutes...\n")

result = hb.run(X_train, y_train)

print("\n" + "=" * 70)
print("âœ… HYPERBAND COMPLETE!")
print("=" * 70)
print(f"Best validation score: {result['best_score']:.4f}")
print(f"Configs evaluated: {result['configs_evaluated']}")
print(f"Total time: {result['total_time']:.2f}s ({result['total_time']/60:.1f} minutes)")
print(f"Best config: {result['best_config']}")

RUNNING HYPERBAND ON COVERTYPE (50K samples)

ðŸš€ Starting Hyperband optimization...
This will take about 5-10 minutes...

HYPERBAND OPTIMIZATION
Max iterations: 81, eta: 3
Total brackets: 5

BRACKET 1/5 (s=4)

  Bracket s=4: Starting with 81 configs, r=1.0
    Round 0: Evaluating 81 configs with r=1
      Keeping top 27 configs, best score: 0.7877
    Round 1: Evaluating 27 configs with r=3
      Keeping top 9 configs, best score: 0.8009
    Round 2: Evaluating 9 configs with r=9
      Keeping top 3 configs, best score: 0.8335
    Round 3: Evaluating 3 configs with r=27
      Keeping top 1 configs, best score: 0.8637
    Round 4: Evaluating 1 configs with r=81

  Bracket 4 complete. Best score: 0.8727

BRACKET 2/5 (s=3)

  Bracket s=3: Starting with 34 configs, r=3.0
    Round 0: Evaluating 34 configs with r=3
      Keeping top 11 configs, best score: 0.7977
    Round 1: Evaluating 11 configs with r=9
      Keeping top 3 configs, best score: 0.8316
    Round 2: Evaluating 3 configs w

Save Results

In [4]:
import json
from datetime import datetime

# Create results directory
results_dir = Path('/Users/srinivass/Budgetaware_hpo/results/hpo')
results_dir.mkdir(parents=True, exist_ok=True)

# Save summary
summary = {
    'dataset': 'covertype',
    'method': 'hyperband',
    'date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'best_score': float(result['best_score']),
    'configs_evaluated': result['configs_evaluated'],
    'total_time_seconds': result['total_time'],
    'best_config': {k: str(v) for k, v in result['best_config'].items()}
}

# Save to JSON
json_path = results_dir / 'hyperband_covertype_summary.json'
with open(json_path, 'w') as f:
    json.dump(summary, f, indent=2)

print(f"âœ… Results saved to: {json_path}")

# Also save detailed results
detailed_results = pd.DataFrame(result['all_results'])
csv_path = results_dir / 'hyperband_covertype_detailed.csv'
detailed_results.to_csv(csv_path, index=False)

print(f"âœ… Detailed results saved to: {csv_path}")
print(f"\nðŸ“Š Total configs evaluated: {len(detailed_results)}")

âœ… Results saved to: /Users/srinivass/Budgetaware_hpo/results/hpo/hyperband_covertype_summary.json
âœ… Detailed results saved to: /Users/srinivass/Budgetaware_hpo/results/hpo/hyperband_covertype_detailed.csv

ðŸ“Š Total configs evaluated: 206
