# üß¨ Algoritma Genetika untuk Penentuan Kelompok KKM Reguler
## UIN Malang - Kaggle Version

---

**Platform**: Kaggle Notebook  
**Dataset**: Upload `master_data.csv` ke Kaggle Dataset

**Tujuan**: Mengelompokkan mahasiswa ke dalam kelompok-kelompok KKM Reguler yang optimal dengan mempertimbangkan:
- ‚úÖ Keberadaan anggota HTQ
- ‚úÖ Heterogenitas jurusan
- ‚úÖ Proporsi jenis kelamin
- ‚úÖ Jumlah anggota per kelompok

**Metode**: Genetic Algorithm dengan PMX Crossover dan Reciprocal Exchange Mutation

---

### üìã Langkah Setup di Kaggle:
1. Upload dataset `master_data.csv` ke Kaggle Dataset
2. Add dataset ke notebook ini
3. Run all cells
4. Download hasil dari Output section

## 1. Import Libraries & Setup Environment

In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime
import os
import glob

print("‚úÖ Libraries imported successfully!")
print(f"   Pandas: {pd.__version__}")
print(f"   Numpy: {np.__version__}")

# Detect environment (Kaggle vs Local)
if os.path.exists('/kaggle/input'):
    print("üåê Running on KAGGLE environment")
    KAGGLE_MODE = True
    INPUT_DIR = '/kaggle/input'
    OUTPUT_DIR = '/kaggle/working'
else:
    print("üíª Running on LOCAL environment")
    KAGGLE_MODE = False
    INPUT_DIR = '../data'
    OUTPUT_DIR = '../pengujian/output'

print(f"   Input directory: {INPUT_DIR}")
print(f"   Output directory: {OUTPUT_DIR}")

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"   üìÅ Output directory created")


## 2. Load and Validate Data

In [None]:
# Auto-detect CSV file in Kaggle or local
if KAGGLE_MODE:
    # Find CSV file in Kaggle input
    csv_files = glob.glob(f'{INPUT_DIR}/**/*.csv', recursive=True)
    if csv_files:
        csv_path = csv_files[0]
        print(f"üìÅ Found dataset: {csv_path}")
    else:
        raise FileNotFoundError("No CSV file found in Kaggle input. Please add dataset!")
else:
    csv_path = f'{INPUT_DIR}/master_data.csv'

# Load data
df = pd.read_csv(csv_path)

# Validate required columns
required_cols = ['ID', 'Jenis Kelamin', 'Jurusan', 'HTQ']
assert all(col in df.columns for col in required_cols), f"Missing columns! Required: {required_cols}"

# Check missing values
missing_count = df[required_cols].isnull().sum().sum()
assert missing_count == 0, f"Found {missing_count} missing values!"

# Check duplicate IDs
dup_count = df['ID'].duplicated().sum()
assert dup_count == 0, f"Found {dup_count} duplicate IDs!"

print("="*80)
print("‚úÖ DATA VALIDATION PASSED")
print("="*80)
print(f"Total Mahasiswa: {len(df)}")
print(f"Jumlah Jurusan: {df['Jurusan'].nunique()}")
print(f"\nDistribusi Jenis Kelamin:")
print(df['Jenis Kelamin'].value_counts())
print(f"\nDistribusi HTQ:")
print(df['HTQ'].value_counts())
print(f"\nTop 5 Jurusan:")
print(df['Jurusan'].value_counts().head())
print("\nSample Data:")
df.head(10)

## 3. Data Preprocessing

In [None]:
def preprocess_data(df, jumlah_kelompok):
    """Preprocess data dan hitung semua statistik yang diperlukan"""
    df_clean = df.copy()
    
    # Normalize HTQ to binary
    df_clean['HTQ'] = df_clean['HTQ'].apply(lambda x: 1 if str(x).lower() in ['ya', 'lulus', '1'] else 0)
    
    # Calculate aggregate statistics
    N = len(df_clean)
    L = (df_clean['Jenis Kelamin'] == 'LK').sum()
    P = (df_clean['Jenis Kelamin'] == 'PR').sum()
    K = jumlah_kelompok
    
    # Calculate expected proportions
    PL = L / N
    PP = P / N
    
    # Calculate expected sizes per group
    A = N // K
    sisa = N % K
    
    expected_sizes = [A + 1 if i < sisa else A for i in range(K)]
    
    # Max fitness
    max_fitness = K * 4
    
    return {
        'df_clean': df_clean, 'N': N, 'L': L, 'P': P, 'K': K,
        'PL': PL, 'PP': PP, 'A': A, 'sisa': sisa,
        'expected_sizes': expected_sizes, 'max_fitness': max_fitness
    }

# Set jumlah kelompok - ADJUST THIS VALUE
JUMLAH_KELOMPOK = 190

# Preprocess
preprocessed = preprocess_data(df, JUMLAH_KELOMPOK)
df_clean = preprocessed['df_clean']

print("="*80)
print("‚úÖ PREPROCESSING COMPLETE")
print("="*80)
print(f"Total Mahasiswa (N): {preprocessed['N']}")
print(f"Laki-laki (L): {preprocessed['L']} ({preprocessed['PL']:.2%})")
print(f"Perempuan (P): {preprocessed['P']} ({preprocessed['PP']:.2%})")
print(f"Jumlah Kelompok (K): {preprocessed['K']}")
print(f"Base size: {preprocessed['A']}, Sisa: {preprocessed['sisa']}")
print(f"Expected sizes: {preprocessed['expected_sizes'][:5]}... (first 5)")
print(f"Max Fitness: {preprocessed['max_fitness']}")

## 4. Constraint Evaluation Functions

In [None]:
def evaluate_C1(group_df):
    """C1: Minimal ada 1 anggota HTQ di kelompok"""
    htq_count = group_df['HTQ'].sum()
    return 1 if htq_count >= 1 else 0

def evaluate_C2(group_df):
    """C2: Jumlah jurusan berbeda > 50% dari ukuran kelompok"""
    unique_majors = group_df['Jurusan'].nunique()
    threshold = len(group_df) * 0.5
    return 1 if unique_majors > threshold else 0

def evaluate_C3(group_df, PL, PP):
    """C3: Proporsi gender menyimpang ¬±10% dari proporsi ideal"""
    n_group = len(group_df)
    lk_count = (group_df['Jenis Kelamin'] == 'LK').sum()
    pr_count = (group_df['Jenis Kelamin'] == 'PR').sum()
    
    lk_prop = lk_count / n_group
    pr_prop = pr_count / n_group
    
    lk_dev = abs(lk_prop - PL)
    pr_dev = abs(pr_prop - PP)
    
    return 1 if (lk_dev <= 0.1 and pr_dev <= 0.1) else 0

def evaluate_C4(group_df, expected_size):
    """C4: Ukuran kelompok sesuai expected size"""
    return 1 if len(group_df) == expected_size else 0

print("‚úÖ Constraint functions defined (C1, C2, C3, C4)")

## 5. Fitness Calculation

In [None]:
def decode_kromosom(kromosom, df_clean, expected_sizes):
    """Decode permutation kromosom into groups"""
    groups = []
    start_idx = 0
    
    for i, size in enumerate(expected_sizes):
        end_idx = start_idx + size
        group_ids = kromosom[start_idx:end_idx]
        group_df = df_clean[df_clean['ID'].isin(group_ids)].copy()
        groups.append(group_df)
        start_idx = end_idx
    
    return groups

def calculate_fitness(kromosom, df_clean, expected_sizes, PL, PP):
    """Calculate total fitness of a kromosom"""
    groups = decode_kromosom(kromosom, df_clean, expected_sizes)
    total_fitness = 0
    
    for i, group_df in enumerate(groups):
        c1 = evaluate_C1(group_df)
        c2 = evaluate_C2(group_df)
        c3 = evaluate_C3(group_df, PL, PP)
        c4 = evaluate_C4(group_df, expected_sizes[i])
        
        total_fitness += (c1 + c2 + c3 + c4)
    
    return total_fitness

print("‚úÖ Fitness calculation functions defined")

## 6. Population Initialization

In [None]:
def initialize_population(df_clean, popsize):
    """Initialize population with random permutations"""
    student_ids = df_clean['ID'].values
    population = []
    
    for _ in range(popsize):
        kromosom = np.random.permutation(student_ids)
        population.append(kromosom)
    
    return population

print("‚úÖ Population initialization function defined")

## 7. Parent Selection

In [None]:
def select_parents_for_crossover(population, cr):
    """Select parent pairs for crossover based on CR"""
    num_crossover = int(len(population) * cr)
    if num_crossover % 2 != 0:
        num_crossover += 1
    
    # Need at least 2 individuals for crossover
    if num_crossover < 2 or len(population) < 2:
        return []
    
    # Can't select more than population size
    num_crossover = min(num_crossover, len(population))
    
    indices = np.random.choice(len(population), num_crossover, replace=False)
    parent_pairs = [(population[indices[i]], population[indices[i+1]]) 
                    for i in range(0, num_crossover, 2)]
    return parent_pairs

def select_parents_for_mutation(population, mr):
    """Select parents for mutation based on MR"""
    num_mutation = int(len(population) * mr)
    
    # Handle edge cases
    if num_mutation == 0 or len(population) == 0:
        return []
    
    num_mutation = min(num_mutation, len(population))
    indices = np.random.choice(len(population), num_mutation, replace=False)
    return [population[i] for i in indices]

print("‚úÖ Parent selection functions defined")

## 8. PMX Crossover

In [None]:
def pmx_crossover(parent1, parent2):
    """
    Partially Mapped Crossover (PMX) - Fixed version
    Prevents infinite loops by following the mapping chain properly
    """
    size = len(parent1)
    
    # Choose two random cut points
    cx_point1 = np.random.randint(0, size)
    cx_point2 = np.random.randint(0, size)
    if cx_point1 > cx_point2:
        cx_point1, cx_point2 = cx_point2, cx_point1
    
    # Ensure we have at least some segment to swap
    if cx_point1 == cx_point2:
        cx_point2 = min(cx_point1 + 1, size)
    
    # Initialize offspring as copies
    child1 = parent1.copy()
    child2 = parent2.copy()
    
    # Swap middle segments
    child1[cx_point1:cx_point2] = parent2[cx_point1:cx_point2]
    child2[cx_point1:cx_point2] = parent1[cx_point1:cx_point2]
    
    # Fix conflicts using proper PMX algorithm
    def fix_conflicts_pmx(child, p1, p2, start, end):
        """
        Fix conflicts by following the mapping relationship.
        For each position outside the crossover segment,
        if there's a conflict, follow the mapping chain until finding a valid value.
        """
        # Create a set of values in the middle segment for fast lookup
        middle_values = set(child[start:end])
        
        for i in range(size):
            # Only fix positions outside the crossover segment
            if i < start or i >= end:
                # If current value is already in the middle segment (conflict)
                if child[i] in middle_values:
                    # Follow the mapping chain to find a valid replacement
                    value = child[i]
                    visited = set()  # Prevent infinite loops in case of cycles
                    
                    # Keep following the mapping until we find a value not in middle segment
                    while value in middle_values and value not in visited:
                        visited.add(value)
                        
                        # Find where this value appears in p2's middle segment
                        try:
                            idx_in_p2 = np.where(p2[start:end] == value)[0][0] + start
                            # Get the corresponding value from p1
                            value = p1[idx_in_p2]
                        except (IndexError, TypeError):
                            # If not found, break to avoid error
                            break
                    
                    # If we found a valid value (not in middle), use it
                    if value not in middle_values:
                        child[i] = value
                    # else: keep original value (shouldn't happen in valid permutation)
    
    fix_conflicts_pmx(child1, parent1, parent2, cx_point1, cx_point2)
    fix_conflicts_pmx(child2, parent2, parent1, cx_point1, cx_point2)
    
    return child1, child2

print("‚úÖ PMX Crossover function defined (fixed infinite loop bug)")

## 9. Reciprocal Exchange Mutation

In [None]:
def reciprocal_exchange_mutation(parent):
    """Swap two random genes"""
    child = parent.copy()
    idx1, idx2 = np.random.choice(len(child), 2, replace=False)
    child[idx1], child[idx2] = child[idx2], child[idx1]
    return child

print("‚úÖ Reciprocal Exchange Mutation function defined")

## 10. Elitism Replacement Strategy

In [None]:
def elitism_replacement(population, offspring, df_clean, expected_sizes, PL, PP, popsize):
    """Replace population with best individuals from combined pool"""
    combined = population + offspring
    
    # Calculate fitness for all
    fitness_scores = [calculate_fitness(ind, df_clean, expected_sizes, PL, PP) 
                      for ind in combined]
    
    # Sort by fitness (descending)
    sorted_indices = np.argsort(fitness_scores)[::-1]
    
    # Select top PopSize individuals
    new_population = [combined[i] for i in sorted_indices[:popsize]]
    new_fitness = [fitness_scores[i] for i in sorted_indices[:popsize]]
    
    return new_population, new_fitness

def elitism_replacement_optimized(population, population_fitness, offspring, 
                                   df_clean, expected_sizes, PL, PP, popsize):
    """
    Optimized elitism with fitness caching.
    Only calculates fitness for NEW offspring, reuses existing population fitness.
    This dramatically speeds up the algorithm (6√ó faster per generation).
    """
    # Calculate fitness ONLY for new offspring
    offspring_fitness = [calculate_fitness(ind, df_clean, expected_sizes, PL, PP) 
                        for ind in offspring]
    
    # Combine populations and fitness scores
    combined = population + offspring
    combined_fitness = population_fitness + offspring_fitness
    
    # Sort by fitness (descending)
    sorted_indices = sorted(range(len(combined)), 
                          key=lambda i: combined_fitness[i], 
                          reverse=True)
    
    # Select top PopSize individuals
    new_population = [combined[i] for i in sorted_indices[:popsize]]
    new_fitness = [combined_fitness[i] for i in sorted_indices[:popsize]]
    
    return new_population, new_fitness

print("‚úÖ Elitism Replacement functions defined (standard & optimized)")

## 11. Define GA Runner Function

In [None]:
import time

def run_ga_single(df_clean, preprocessed, popsize, max_gen, cr, mr, seed, target_fitness=1.0):
    """
    Run single GA experiment with given parameters
    
    Returns:
        dict: Results containing fitness, generation, runtime, etc.
    """
    # Set seed for reproducibility
    np.random.seed(seed)
    random.seed(seed)
    
    # Extract preprocessed data
    N = preprocessed['N']
    K = preprocessed['K']
    PL = preprocessed['PL']
    PP = preprocessed['PP']
    expected_sizes = preprocessed['expected_sizes']
    max_fitness = preprocessed['max_fitness']
    
    # Initialize
    start_time = time.time()
    population = initialize_population(df_clean, popsize)
    
    # Calculate initial fitness
    init_fitness_start = time.time()
    population_fitness = []
    for kromosom in population:
        fitness = calculate_fitness(kromosom, df_clean, expected_sizes, PL, PP)
        population_fitness.append(fitness)
    init_fitness_time = time.time() - init_fitness_start
    
    # Track best solution
    best_fitness_history = []
    avg_fitness_history = []
    best_overall_fitness = max(population_fitness)
    best_overall_solution = population[population_fitness.index(best_overall_fitness)].copy()
    
    # Main GA Loop
    generation = 0
    for generation in range(1, max_gen + 1):
        # Crossover
        parent_pairs = select_parents_for_crossover(population, cr)
        offspring_cx = []
        for p1, p2 in parent_pairs:
            c1, c2 = pmx_crossover(p1, p2)
            offspring_cx.extend([c1, c2])
        
        # Mutation
        parents_mut = select_parents_for_mutation(population, mr)
        offspring_mut = [reciprocal_exchange_mutation(p) for p in parents_mut]
        
        # Combine offspring
        offspring = offspring_cx + offspring_mut
        
        # Replacement
        population, population_fitness = elitism_replacement_optimized(
            population, population_fitness, offspring, 
            df_clean, expected_sizes, PL, PP, popsize
        )
        
        # Track statistics
        best_fitness = population_fitness[0]
        avg_fitness = np.mean(population_fitness)
        best_fitness_history.append(best_fitness)
        avg_fitness_history.append(avg_fitness)
        
        # Update best overall
        if best_fitness > best_overall_fitness:
            best_overall_fitness = best_fitness
            best_overall_solution = population[0].copy()
        
        # Check termination
        if best_fitness >= target_fitness * max_fitness:
            break
    
    # Final results
    total_time = time.time() - start_time
    
    # Calculate constraint satisfaction
    best_groups = decode_kromosom(best_overall_solution, df_clean, expected_sizes)
    constraint_stats = {
        'C1_satisfied': 0, 'C2_satisfied': 0, 
        'C3_satisfied': 0, 'C4_satisfied': 0, 
        'perfect_groups': 0
    }
    
    for i, group_df in enumerate(best_groups):
        c1 = evaluate_C1(group_df)
        c2 = evaluate_C2(group_df)
        c3 = evaluate_C3(group_df, PL, PP)
        c4 = evaluate_C4(group_df, expected_sizes[i])
        
        constraint_stats['C1_satisfied'] += c1
        constraint_stats['C2_satisfied'] += c2
        constraint_stats['C3_satisfied'] += c3
        constraint_stats['C4_satisfied'] += c4
        
        if c1 + c2 + c3 + c4 == 4:
            constraint_stats['perfect_groups'] += 1
    
    return {
        'seed': seed,
        'best_fitness': best_overall_fitness,
        'best_fitness_pct': best_overall_fitness / max_fitness,
        'final_generation': generation,
        'initial_best_fitness': best_fitness_history[0] if best_fitness_history else 0,
        'fitness_improvement': best_overall_fitness - (best_fitness_history[0] if best_fitness_history else 0),
        'total_runtime_sec': total_time,
        'total_runtime_min': total_time / 60,
        'avg_time_per_gen': total_time / generation if generation > 0 else 0,
        'init_fitness_time': init_fitness_time,
        'target_reached': 'Yes' if best_overall_fitness >= target_fitness * max_fitness else 'No',
        'generations_to_best': best_fitness_history.index(best_overall_fitness) + 1 if best_overall_fitness in best_fitness_history else generation,
        'avg_final_fitness': avg_fitness_history[-1] if avg_fitness_history else 0,
        'C1_satisfied': constraint_stats['C1_satisfied'],
        'C2_satisfied': constraint_stats['C2_satisfied'],
        'C3_satisfied': constraint_stats['C3_satisfied'],
        'C4_satisfied': constraint_stats['C4_satisfied'],
        'perfect_groups': constraint_stats['perfect_groups'],
        'C1_pct': constraint_stats['C1_satisfied'] / K,
        'C2_pct': constraint_stats['C2_satisfied'] / K,
        'C3_pct': constraint_stats['C3_satisfied'] / K,
        'C4_pct': constraint_stats['C4_satisfied'] / K,
        'perfect_groups_pct': constraint_stats['perfect_groups'] / K,
        'best_fitness_history': best_fitness_history,
        'avg_fitness_history': avg_fitness_history
    }

print("‚úÖ GA Runner Function defined")

## 12. Define Test Scenarios - PHASE 3: Cr & Mr Combination Testing

In [None]:
# PHASE 3: Test Cr & Mr Combinations
# Fixed Parameters: PopSize=70, Generation=100
# Goal: Find optimal combination of Crossover Rate (Cr) and Mutation Rate (Mr)

# ========================================
# üîß UBAH KOMBINASI Cr DAN Mr DI SINI:
# ========================================
# Format: [(Cr1, Mr1), (Cr2, Mr2), ...]
# Contoh: [(0.5, 0.5), (0.7, 0.3), (0.8, 0.2)]
# Nilai Cr dan Mr harus antara 0.0 - 1.0

CR_MR_COMBINATIONS = [
    (0.1, 0.9),  # üëà EDIT DI SINI untuk mengubah kombinasi yang ingin ditest
]

# ========================================

scenarios_cr_mr = []
for i, (cr, mr) in enumerate(CR_MR_COMBINATIONS, start=1):
    scenarios_cr_mr.append({
        'scenario_id': i,
        'scenario_name': f'S{i:02d}_Cr{cr}_Mr{mr}',
        'phase': 'Phase 3: Cr & Mr Test',
        'popsize': 70,
        'generation': 400,
        'cr': cr,
        'mr': mr
    })

# Use only Phase 3 scenarios
all_scenarios = scenarios_cr_mr

print("="*80)
print("üìã PHASE 3: Cr & Mr COMBINATION TEST SCENARIOS")
print("="*80)
print(f"Total Scenarios: {len(all_scenarios)}")
print(f"Cr & Mr Combinations: {CR_MR_COMBINATIONS}")
print(f"Fixed Parameters:")
print(f"  - PopSize: 70")
print(f"  - Generation: 100")
print(f"\nEach scenario will be run 10 times with different seeds")
print(f"Total GA runs: {len(all_scenarios)} √ó 10 = {len(all_scenarios) * 10} runs")
print(f"Estimated time: ~{len(all_scenarios) * 0.5:.1f} hours (3 min/run average)")
print("="*80)

# Display all scenarios
print("\nüìä All Phase 3 Scenarios:")
for s in scenarios_cr_mr:
    print(f"  {s['scenario_name']}: PopSize={s['popsize']}, Gen={s['generation']}, Cr={s['cr']}, Mr={s['mr']}")


## 13. Run All Test Scenarios

In [None]:
# Configuration
NUM_RUNS_PER_SCENARIO = 10
TARGET_FITNESS = 1.0

# Clear previous results
all_results = []

print("="*80)
print("üöÄ STARTING COMPREHENSIVE GA TESTING")
print("="*80)
print(f"Total Scenarios: {len(all_scenarios)}")
print(f"Runs per Scenario: {NUM_RUNS_PER_SCENARIO}")
print(f"Total Runs: {len(all_scenarios) * NUM_RUNS_PER_SCENARIO}")
print(f"Random Seed: RANDOM (based on system time)")
print("="*80)

# Overall timing
overall_start = time.time()

# Run all scenarios
for scenario_idx, scenario in enumerate(all_scenarios, start=1):
    scenario_id = scenario['scenario_id']
    scenario_name = scenario['scenario_name']
    phase = scenario['phase']
    popsize = scenario['popsize']
    generation = scenario['generation']
    cr = scenario['cr']
    mr = scenario['mr']
    
    print(f"\n{'='*80}")
    print(f"üìç SCENARIO {scenario_idx}/{len(all_scenarios)}: {scenario_name}")
    print(f"{'='*80}")
    print(f"Phase: {phase}")
    print(f"Parameters: PopSize={popsize}, Gen={generation}, Cr={cr}, Mr={mr}")
    print(f"Running {NUM_RUNS_PER_SCENARIO} independent runs...")
    
    scenario_start = time.time()
    scenario_results = []
    
    # Run with random seeds (based on system time + counter)
    for run_id in range(1, NUM_RUNS_PER_SCENARIO + 1):
        # Generate truly random seed based on current time
        seed = int(time.time() * 1000000) % (2**31) + run_id
        
        print(f"\n  Run {run_id}/{NUM_RUNS_PER_SCENARIO} (Seed={seed})...", end=" ")
        run_start = time.time()
        
        try:
            result = run_ga_single(
                df_clean, preprocessed, 
                popsize, generation, cr, mr, 
                seed, TARGET_FITNESS
            )
            
            # Add scenario info to result
            result['scenario_id'] = scenario_id
            result['scenario_name'] = scenario_name
            result['phase'] = phase
            result['run_id'] = run_id
            result['popsize'] = popsize
            result['max_generation'] = generation
            result['cr'] = cr
            result['mr'] = mr
            
            scenario_results.append(result)
            all_results.append(result)
            
            run_time = time.time() - run_start
            print(f"‚úÖ Done in {run_time:.1f}s | Fitness: {result['best_fitness']:.0f}/{preprocessed['max_fitness']} ({result['best_fitness_pct']:.2%}) | Gen: {result['final_generation']}")
            
        except Exception as e:
            print(f"‚ùå FAILED: {str(e)}")
            continue
    
    # Calculate scenario summary statistics
    if scenario_results:
        scenario_time = time.time() - scenario_start
        
        best_fitnesses = [r['best_fitness'] for r in scenario_results]
        runtimes = [r['total_runtime_sec'] for r in scenario_results]
        final_gens = [r['final_generation'] for r in scenario_results]
        
        print(f"\n{'‚îÄ'*80}")
        print(f"üìä SCENARIO {scenario_name} SUMMARY:")
        print(f"{'‚îÄ'*80}")
        print(f"  Best Fitness (mean ¬± std): {np.mean(best_fitnesses):.2f} ¬± {np.std(best_fitnesses):.2f}")
        print(f"  Best Fitness (min-max): {np.min(best_fitnesses):.2f} - {np.max(best_fitnesses):.2f}")
        print(f"  Runtime (mean ¬± std): {np.mean(runtimes):.1f}s ¬± {np.std(runtimes):.1f}s")
        print(f"  Final Generation (mean): {np.mean(final_gens):.1f}")
        print(f"  Scenario Total Time: {scenario_time/60:.2f} minutes")
        
        elapsed_total = time.time() - overall_start
        remaining_scenarios = len(all_scenarios) - scenario_idx
        avg_time_per_scenario = elapsed_total / scenario_idx
        eta_seconds = remaining_scenarios * avg_time_per_scenario
        eta_hours = eta_seconds / 3600
        
        print(f"\n‚è±Ô∏è  Progress: {scenario_idx}/{len(all_scenarios)} scenarios completed")
        print(f"  Elapsed Time: {elapsed_total/3600:.2f} hours")
        print(f"  ETA: {eta_hours:.2f} hours")
        print(f"{'‚îÄ'*80}")

print(f"\n{'='*80}")
print("‚úÖ ALL SCENARIOS COMPLETED")
print(f"{'='*80}")
total_time = time.time() - overall_start
print(f"Total Runs: {len(all_results)}")
print(f"Total Time: {total_time/3600:.2f} hours")
print(f"Average Time per Run: {total_time/len(all_results):.1f} seconds")
print(f"{'='*80}")


## 14. Export All Results to CSV

In [None]:
# Export SIMPLIFIED results - Separate file per Cr & Mr combination with normalized fitness
print("="*80)
print("üíæ EXPORTING SIMPLIFIED RESULTS (SEPARATE FILES PER Cr & Mr COMBINATION)")
print("="*80)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
max_fitness = preprocessed['max_fitness']

# Group results by Cr & Mr combination and create separate files
print("\nüìä Creating separate CSV files per Cr & Mr combination...")
exported_files = []

for cr, mr in CR_MR_COMBINATIONS:
    # Filter results for this Cr & Mr combination
    combo_results = [r for r in all_results if r['cr'] == cr and r['mr'] == mr]
    
    if not combo_results:
        continue
    
    # Create simplified records with normalized fitness
    combo_data = []
    for result in combo_results:
        combo_data.append({
            # PARAMETERS
            'Phase': result['phase'],
            'Scenario_Name': result['scenario_name'],
            'Run': result['run_id'],
            'PopSize': result['popsize'],
            'Generation': result['max_generation'],
            'Cr': result['cr'],
            'Mr': result['mr'],
            
            # NORMALIZED FITNESS (0.0 - 1.0)
            'Fitness': result['best_fitness'] / max_fitness
        })
    
    # Create DataFrame and save
    df_combo = pd.DataFrame(combo_data)
    filename = f'{OUTPUT_DIR}/cr_{cr}_mr_{mr}_results_{timestamp}.csv'
    df_combo.to_csv(filename, index=False)
    exported_files.append(filename)
    
    # Calculate statistics for this combination
    fitness_values = df_combo['Fitness'].values
    print(f"\n   ‚úÖ Cr={cr}, Mr={mr}:")
    print(f"      File: cr_{cr}_mr_{mr}_results_{timestamp}.csv")
    print(f"      Runs: {len(df_combo)}")
    print(f"      Fitness Mean: {np.mean(fitness_values):.4f}")
    print(f"      Fitness Std: {np.std(fitness_values):.4f}")
    print(f"      Fitness Range: {np.min(fitness_values):.4f} - {np.max(fitness_values):.4f}")

print("\n" + "="*80)
print("‚úÖ EXPORT COMPLETE")
print("="*80)
print(f"Total files created: {len(exported_files)}")
print(f"Location: {OUTPUT_DIR}/")
print("\nüìã Files Created:")
for i, filepath in enumerate(exported_files, 1):
    filename = filepath.split('/')[-1]
    print(f"   {i}. {filename}")

print("\nüìä File Format:")
print(f"   Columns: Phase, Scenario_Name, Run, PopSize, Generation, Cr, Mr, Fitness")
print(f"   Fitness Range: 0.0 (worst) to 1.0 (perfect)")
print(f"   Rows per file: {NUM_RUNS_PER_SCENARIO} (one per run)")
print("="*80)

# Display comparison summary
print("\nüìä COMPARISON SUMMARY (All Cr & Mr Combinations):")
comparison_data = []
for cr, mr in CR_MR_COMBINATIONS:
    combo_results = [r for r in all_results if r['cr'] == cr and r['mr'] == mr]
    fitness_values = [r['best_fitness'] / max_fitness for r in combo_results]
    
    comparison_data.append({
        'Cr': cr,
        'Mr': mr,
        'Combination': f'Cr={cr}, Mr={mr}',
        'Fitness_Mean': np.mean(fitness_values),
        'Fitness_Std': np.std(fitness_values),
        'Fitness_Min': np.min(fitness_values),
        'Fitness_Max': np.max(fitness_values),
        'Num_Runs': len(fitness_values)
    })

comparison_df = pd.DataFrame(comparison_data)
print(comparison_df.to_string(index=False))

print("\n\nüèÜ Best Cr & Mr Combination:")
best_idx = comparison_df['Fitness_Mean'].idxmax()
best_combo = comparison_df.loc[best_idx]
print(f"   Combination: Cr={best_combo['Cr']}, Mr={best_combo['Mr']}")
print(f"   Mean Fitness: {best_combo['Fitness_Mean']:.4f} ¬± {best_combo['Fitness_Std']:.4f}")
print(f"   Range: {best_combo['Fitness_Min']:.4f} - {best_combo['Fitness_Max']:.4f}")
