# Parallel Processing Demo 1: Cross-Validation and Grid Search

This notebook demonstrates parallel execution for:
- `fit_resamples()` - Parallel CV fold evaluation
- `tune_grid()` - Parallel grid search
- CPU core warnings and validation
- Performance comparisons (sequential vs parallel)

**Key Features Demonstrated:**
- ‚úÖ `n_jobs` parameter usage
- ‚úÖ CPU warning system
- ‚úÖ Progress tracking with `verbose=True`
- ‚úÖ Speedup measurements
- ‚úÖ Results consistency validation

## Setup and Data Loading

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import warnings

# py-tidymodels imports
from py_workflows import workflow
from py_parsnip import linear_reg, rand_forest
from py_rsample import vfold_cv, initial_split, training, testing
from py_yardstick import metric_set, rmse, mae, r_squared
from py_tune import fit_resamples, tune_grid, grid_regular, tune
from py_tune.parallel_utils import get_cpu_count, validate_n_jobs

print("All imports successful!")

In [None]:
# Load data
raw_data = pd.read_csv('__data/preem.csv')
df = raw_data.copy()
df['date'] = pd.to_datetime(df['date'])

print(f"Data shape: {df.shape}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
display(df.head())

In [None]:
# Create train/test split
split = initial_split(df, prop=0.75, seed=123)
train_data = training(split)
test_data = testing(split)

print(f"Training set: {train_data.shape[0]} rows")
print(f"Test set: {test_data.shape[0]} rows")

In [None]:
# Define formula and metrics
FORMULA = "target ~ ."
metrics = metric_set(rmse, mae, r_squared)

print(f"Formula: {FORMULA}")
print(f"Metrics: rmse, mae, r_squared")

## System Information

In [None]:
# Check system resources
cpu_count = get_cpu_count()
print(f"‚úì Detected {cpu_count} CPU cores")
print(f"‚úì Joblib backend: loky (multiprocessing)")
print(f"‚úì Platform: {import sys; sys.platform}")
print(f"\nThis system can efficiently run up to {cpu_count} parallel jobs.")

## Part 1: Parallel fit_resamples() Demonstration

We'll compare sequential vs parallel execution for CV fold evaluation.

In [None]:
# Create workflow
wf = workflow().add_formula(FORMULA).add_model(linear_reg())

# Create 5-fold CV
folds = vfold_cv(train_data, v=5, seed=123)

print(f"Workflow: {wf}")
print(f"CV folds: {len(folds)}")

### Sequential Execution (Baseline)

In [None]:
# Sequential execution
print("Running SEQUENTIAL fit_resamples...")
start = time.time()
results_seq = fit_resamples(
    wf,
    folds,
    metrics=metrics,
    n_jobs=1,  # Sequential
    verbose=True
)
seq_time = time.time() - start

print(f"\n‚úì Sequential execution completed in {seq_time:.2f} seconds")

In [None]:
# View sequential results
metrics_seq = results_seq.collect_metrics()
display(metrics_seq)

### Parallel Execution with 2 Cores

In [None]:
# Parallel execution with 2 cores
print("Running PARALLEL fit_resamples (n_jobs=2)...")
start = time.time()
results_par2 = fit_resamples(
    wf,
    folds,
    metrics=metrics,
    n_jobs=2,  # Use 2 cores
    verbose=True
)
par2_time = time.time() - start

speedup_2 = seq_time / par2_time
efficiency_2 = (speedup_2 / 2) * 100

print(f"\n‚úì Parallel execution (2 cores) completed in {par2_time:.2f} seconds")
print(f"‚úì Speedup: {speedup_2:.2f}x")
print(f"‚úì Efficiency: {efficiency_2:.1f}%")

### Parallel Execution with All Cores

In [None]:
# Parallel execution with all cores
print(f"Running PARALLEL fit_resamples (n_jobs=-1, using all {cpu_count} cores)...")
start = time.time()
results_par_all = fit_resamples(
    wf,
    folds,
    metrics=metrics,
    n_jobs=-1,  # Use all cores
    verbose=True
)
par_all_time = time.time() - start

speedup_all = seq_time / par_all_time
efficiency_all = (speedup_all / cpu_count) * 100

print(f"\n‚úì Parallel execution (all cores) completed in {par_all_time:.2f} seconds")
print(f"‚úì Speedup: {speedup_all:.2f}x")
print(f"‚úì Efficiency: {efficiency_all:.1f}%")

### Results Consistency Check

In [None]:
# Verify results are identical
metrics_par2 = results_par2.collect_metrics()
metrics_par_all = results_par_all.collect_metrics()

# Compare metrics
print("Consistency Check:")
for metric in ['rmse', 'mae', 'rsq']:
    seq_val = metrics_seq[metrics_seq['metric'] == metric]['mean'].values[0]
    par2_val = metrics_par2[metrics_par2['metric'] == metric]['mean'].values[0]
    par_all_val = metrics_par_all[metrics_par_all['metric'] == metric]['mean'].values[0]
    
    match = np.allclose([seq_val, par2_val, par_all_val], seq_val, rtol=1e-10)
    status = "‚úì IDENTICAL" if match else "‚úó DIFFERENT"
    print(f"  {metric}: {status}")

print("\n‚úì All parallel executions produce identical results to sequential!")

### Performance Comparison

In [None]:
# Create performance comparison table
perf_df = pd.DataFrame({
    'Configuration': ['Sequential', f'Parallel (2 cores)', f'Parallel ({cpu_count} cores)'],
    'n_jobs': [1, 2, -1],
    'Time (s)': [seq_time, par2_time, par_all_time],
    'Speedup': [1.0, speedup_2, speedup_all],
    'Efficiency (%)': [100.0, efficiency_2, efficiency_all]
})

display(perf_df)

# Plot speedup
plt.figure(figsize=(10, 6))
plt.bar(perf_df['Configuration'], perf_df['Speedup'], color=['gray', 'blue', 'green'])
plt.ylabel('Speedup (x)')
plt.title('fit_resamples() Speedup: Sequential vs Parallel')
plt.axhline(y=1, color='r', linestyle='--', label='Baseline')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## Part 2: CPU Warning Demonstrations

The parallel processing system includes intelligent warnings to help users avoid inefficient configurations.

### Warning 1: Oversubscription (n_jobs > available cores)

In [None]:
# Trigger oversubscription warning
print(f"System has {cpu_count} cores. Requesting {cpu_count + 4} cores...\n")

with warnings.catch_warnings(record=True) as w:
    warnings.simplefilter("always")
    
    results_over = fit_resamples(
        wf,
        folds,
        metrics=metrics,
        n_jobs=cpu_count + 4,  # Request more cores than available
        verbose=False
    )
    
    if w:
        print("‚ö†Ô∏è  WARNING TRIGGERED:")
        print(f"    {w[0].message}")
        print("\nüí° Recommendation: Use n_jobs=-1 or n_jobs={cpu_count} instead")

### Warning 2: Inefficiency (n_jobs > task count)

In [None]:
# Create smaller CV with only 3 folds
folds_3 = vfold_cv(train_data, v=3, seed=123)
print(f"Created {len(folds_3)} CV folds. Requesting 10 workers...\n")

with warnings.catch_warnings(record=True) as w:
    warnings.simplefilter("always")
    
    results_ineff = fit_resamples(
        wf,
        folds_3,
        metrics=metrics,
        n_jobs=10,  # More workers than tasks
        verbose=False
    )
    
    if w:
        print("‚ö†Ô∏è  WARNING TRIGGERED:")
        print(f"    {w[0].message}")
        print("\nüí° Recommendation: Use n_jobs=3 (number of folds) instead")

## Part 3: Parallel Grid Search with tune_grid()

Grid search benefits significantly from parallel execution since it involves many model fits (configs √ó folds).

In [None]:
# Create tunable workflow
spec_tunable = linear_reg(
    penalty=tune(),
    mixture=tune()
).set_engine("sklearn")

wf_tune = workflow().add_formula(FORMULA).add_model(spec_tunable)

# Define parameter space
param_info = {
    'penalty': {'range': (0.001, 1.0), 'trans': 'log'},
    'mixture': {'range': (0, 1)}
}

# Create grid (5√ó5 = 25 configurations)
grid = grid_regular(param_info, levels=5)
n_configs = len(grid)
n_folds = 5
total_fits = n_configs * n_folds

print(f"Grid search configuration:")
print(f"  Configurations: {n_configs}")
print(f"  CV folds: {n_folds}")
print(f"  Total fits: {total_fits}")
print(f"\nThis is a good candidate for parallel execution!")

### Sequential Grid Search (Baseline)

In [None]:
print(f"Running SEQUENTIAL grid search ({total_fits} fits)...")
start = time.time()
tune_results_seq = tune_grid(
    wf_tune,
    folds,
    grid=grid,
    metrics=metrics,
    n_jobs=1,
    verbose=True
)
tune_seq_time = time.time() - start

print(f"\n‚úì Sequential grid search completed in {tune_seq_time:.2f} seconds")
print(f"  ({tune_seq_time / total_fits:.2f} seconds per fit)")

### Parallel Grid Search

In [None]:
print(f"Running PARALLEL grid search ({total_fits} fits, n_jobs=-1)...")
start = time.time()
tune_results_par = tune_grid(
    wf_tune,
    folds,
    grid=grid,
    metrics=metrics,
    n_jobs=-1,  # Use all cores
    verbose=True
)
tune_par_time = time.time() - start

tune_speedup = tune_seq_time / tune_par_time
tune_efficiency = (tune_speedup / cpu_count) * 100

print(f"\n‚úì Parallel grid search completed in {tune_par_time:.2f} seconds")
print(f"  ({tune_par_time / total_fits:.2f} seconds per fit)")
print(f"‚úì Speedup: {tune_speedup:.2f}x")
print(f"‚úì Efficiency: {tune_efficiency:.1f}%")

### Grid Search Results

In [None]:
# Show best results
best_results = tune_results_par.show_best(metric='rmse', n=5)
display(best_results)

# Get best parameters
best_params = tune_results_par.select_best(metric='rmse', maximize=False)
print(f"\nBest parameters:")
print(f"  penalty: {best_params['penalty']:.4f}")
print(f"  mixture: {best_params['mixture']:.4f}")

## Summary and Recommendations

In [None]:
print("=" * 80)
print("PARALLEL PROCESSING PERFORMANCE SUMMARY")
print("=" * 80)
print(f"\nSystem: {cpu_count} CPU cores")
print(f"\n1. fit_resamples() - 5-fold CV")
print(f"   Sequential: {seq_time:.2f}s")
print(f"   Parallel (2 cores): {par2_time:.2f}s (speedup: {speedup_2:.2f}x)")
print(f"   Parallel (all cores): {par_all_time:.2f}s (speedup: {speedup_all:.2f}x)")

print(f"\n2. tune_grid() - {total_fits} fits")
print(f"   Sequential: {tune_seq_time:.2f}s")
print(f"   Parallel (all cores): {tune_par_time:.2f}s (speedup: {tune_speedup:.2f}x)")

print("\n" + "=" * 80)
print("RECOMMENDATIONS")
print("=" * 80)
print(f"\n‚úÖ Use parallel execution (n_jobs=-1) for:")
print(f"   - Grid search with many configurations (>10)")
print(f"   - CV with many folds (>5) and complex models")
print(f"   - Any task taking >30 seconds total")

print(f"\n‚ö†Ô∏è  Use sequential execution (n_jobs=1) for:")
print(f"   - Quick tasks (<10 seconds)")
print(f"   - Simple models with few folds")
print(f"   - Debugging (easier to trace errors)")

print(f"\nüí° Tips:")
print(f"   - Always use verbose=True to monitor progress")
print(f"   - Watch for CPU warnings - they help optimize performance")
print(f"   - On shared machines, use n_jobs={max(1, cpu_count-1)} to leave cores free")
print("=" * 80)