In [None]:
import pandas as pd
import time
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from skelm import ELMClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Function for scikit-elm OS-ELM tuning and plotting
def tune_and_plot_os_elm_lib(file_path, neuron_range, batch_sizes, random_seed=42):
    """
    Loads a dataset, tunes the scikit-elm ELMClassifier sequentially,
    and plots the performance.
    """
    filename = os.path.basename(file_path)
    print(f"--- Starting scikit-elm OS-ELM Tuning for {filename} ---")

    # 1. Load and prepare data once
    df = pd.read_csv(file_path)
    if df.isnull().sum().sum() > 0:
        df.fillna(df.mean(), inplace=True)
    
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 2. Nested tuning loop
    all_results = []
    for batch_size in batch_sizes:
        print(f"\nTesting with Batch Size: {batch_size}")
        results_for_size = []
        for n_neurons in neuron_range:
            model = ELMClassifier(n_neurons=n_neurons, random_state=random_seed)
            
            start_time = time.time()
            # Simulate sequential training by fitting on batches
            for i in range(0, len(X_train_scaled), batch_size):
                X_batch = X_train_scaled[i:i+batch_size]
                y_batch = y_train[i:i+batch_size]
                model.fit(X_batch, y_batch)
            training_time = time.time() - start_time
            
            predictions = model.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, predictions)
            
            results_for_size.append({'neurons': n_neurons, 'accuracy': accuracy, 'training_time': training_time})
            print(f"  Neurons: {n_neurons:5d} | Accuracy: {accuracy*100:6.2f}% | Time: {training_time:.2f}s")
        
        all_results.append({'batch_size': batch_size, 'data': pd.DataFrame(results_for_size)})

    # 3. Plotting results for each batch_size
    for result_group in all_results:
        batch_size = result_group['batch_size']
        df_results = result_group['data']
        
        if df_results.empty:
            continue

        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
        fig.suptitle(f'scikit-elm OS-ELM Tuning for {filename} (Batch Size: {batch_size})', fontsize=16)

        ax1.plot(df_results['neurons'], df_results['accuracy'], marker='o')
        ax1.set_title('Accuracy vs. Number of Hidden Neurons')
        ax1.set_xlabel('Number of Hidden Neurons')
        ax1.set_ylabel('Accuracy')
        ax1.grid(True)

        ax2.plot(df_results['neurons'], df_results['training_time'], marker='o', color='r')
        ax2.set_title('Training Time vs. Number of Hidden Neurons')
        ax2.set_xlabel('Number of Hidden Neurons')
        ax2.set_ylabel('Training Time (seconds)')
        ax2.grid(True)
        
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()
    
    print(f"--- Finished scikit-elm OS-ELM Tuning for {filename} ---\n")

# --- Main Script ---
DATASET_PATH = '../Datasets'
dataset_files = [os.path.join(DATASET_PATH, f) for f in os.listdir(DATASET_PATH)]

# Define hyperparameter ranges to test
neuron_counts = [500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000, 1100, 1200, 1300, 1400]
batch_sizes_to_test = [5000, 6000, 7000, 8000, 9000, 10000] # Test a smaller and a larger batch size

print("Master scikit-elm OS-ELM tuning script started.\n")

for file in dataset_files:
    tune_and_plot_os_elm_lib(
        file_path=file,
        neuron_range=neuron_counts,
        batch_sizes=batch_sizes_to_test
    )