In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import tensorflow.keras.backend as K

In [None]:
def build_mlp(input_dim, num_layers, neurons_list, learning_rate):
    model = Sequential()
    model.add(tf.keras.Input(shape=(input_dim,)))
    model.add(Dense(neurons_list[0], activation='sigmoid'))
    for i in range(1, num_layers):
        model.add(Dense(neurons_list[i], activation='sigmoid'))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy',Precision(), Recall()])
    return model

In [None]:
def fitness_fn(params, X_train, y_train, X_val, y_val, class_weight_dict):
    bounds = [
        (1, 2),           #number of layers
        (8, 128),        # neurons in layer 1
        (8, 128),        # neurons in layer 2
        (-4, -1),        # log10 learning rate
        (20, 200)        # epochs
    ]

    # Clip parameters
    params = np.array([np.clip(p, bounds[i][0], bounds[i][1]) for i, p in enumerate(params)])

    # Check for NaN or inf
    if np.any(np.isnan(params)) or np.any(np.isinf(params)):
        print(f"Invalid parameters detected: {params}")
        return float('inf')


    batch_size = 75


    num_layers=int(params[0])
    neurons = [int(params[i]) for i in range(1, 1 + num_layers)]
    learning_rate = 10 ** params[3]
    epochs = int(params[4])


    epochs = max(1, min(epochs, 200))


    print(f"  Testing config: layers={num_layers}, neurons={neurons}, lr={learning_rate:.6f}, batch={batch_size}, epochs={epochs}")

    try:
        model = build_mlp(X_train.shape[1], num_layers, neurons, learning_rate)
        history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, class_weight=class_weight_dict,
                           verbose=0, validation_data=(X_val, y_val))


        y_pred = model.predict(X_val, verbose=0)
        y_pred_binary = (y_pred > 0.5).astype(int).flatten()


        sse = np.sum((y_val - y_pred.flatten()) ** 2)

        lambda_reg = 0.001


        l1_penalty = 0
        for layer in model.layers:
            if hasattr(layer, 'kernel'):
                l1_penalty += np.sum(np.abs(layer.kernel.numpy()))

        sse_lasso = sse + lambda_reg * l1_penalty


        print(f"    SSE: {sse:.4f}, L1_penalty: {l1_penalty:.4f}, SSE_lasso: {sse_lasso:.4f}")

        # Clean memory
        del model
        tf.keras.backend.clear_session()

        return sse_lasso

    except Exception as e:
        print(f"Error in fitness function: {e}")
        return float('inf')

In [None]:
'''def m_mrfo_optimize(X_train, y_train, X_val, y_val, class_weight_dict, population_size=20, max_iter=30):

    import math

    dim = 5 # Reduced from 7 to 4 parameters
    bounds = [
        (1, 2),          #num of layers
        (8, 128),        # neurons 1
        (8, 128),        # neurons 2
        (-4, -1),        # log10 learning rate
        (20, 200)        # epochs
    ]

    # Initialize population
    population = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds], (population_size, dim))
    fitness = [fitness_fn(ind, X_train, y_train, X_val, y_val, class_weight_dict) for ind in population]
    best_idx = np.argmin(fitness)
    best_pos = population[best_idx].copy()

    beta = 0.005  # spiral coefficient
    alpha = 2     # somersault factor

    print(f"Initial best fitness: {fitness[best_idx]:.4f}")

    for t in range(max_iter):
        print(f"\nIteration {t+1}/{max_iter}")
        a = math.exp(-t / max_iter)  # adaptive coefficient

        for i in range(population_size):
            print(f"  Evaluating individual {i+1}/{population_size}:")

            r1, r2 = np.random.rand(), np.random.rand()
            new_pos = population[i].copy()

            if r1 < 0.5:  # Chain foraging
                leader = best_pos if r2 < 0.5 else population[(i - 1) % population_size]
                new_pos += r1 * (leader - population[i])
            else:  # Cyclone foraging
                if r2 < 0.5:
                    new_pos += beta * np.random.rand(dim) * (best_pos - population[i])
                else:
                    theta = 2 * np.pi * np.random.rand()
                    spiral = beta * np.exp(a * theta) * np.cos(theta)
                    new_pos += spiral * (best_pos - population[i])

            # Somersault foraging
            somersault = population[i] + alpha * (np.random.rand(dim) * best_pos - np.random.rand(dim) * population[i])
            new_pos = (new_pos + somersault) / 2.0

            # Bound new position
            for j in range(dim):
                new_pos[j] = np.clip(new_pos[j], bounds[j][0], bounds[j][1])

            score = fitness_fn(new_pos, X_train, y_train, X_val, y_val, class_weight_dict)

            if score < fitness[i]:
                population[i] = new_pos
                fitness[i] = score
                print(f"  --> IMPROVED: {score:.4f}")

                if score < fitness[best_idx]:
                    best_idx = i
                    best_pos = new_pos.copy()
                    print(f"  --> NEW GLOBAL BEST: {score:.4f}")
            else:
                print(f"  --> No improvement: {score:.4f} vs {fitness[i]:.4f}")

        print(f"Iteration {t+1}/{max_iter}, Best SSE_Lasso: {fitness[best_idx]:.4f}")

    return best_pos, fitness[best_idx]'''

In [None]:
def AOA_optimize(X_train, y_train, X_val, y_val, class_weight_dict, population_size=20, max_iter=30):
    """
    Arithmetic Optimization Algorithm with reduced parameter space
    """
    dim = 5

    bounds = [
        (1 ,2),          #num of layers
        (8, 128),        # neurons 1
        (8, 128),        # neurons 2
        (-4, -1),        # log10 learning rate
        (20, 200)        # epochs
    ]

    # neurons and epochs are int
    discrete_params = [0, 1, 2, 4]

    # AOA Parameters
    alpha = 5
    mu = 0.499

    # Initialize popn randomly
    population = np.zeros((population_size, dim))
    for i in range(population_size):
        for j in range(dim):
            lb, ub = bounds[j]
            population[i][j] = np.random.uniform(lb, ub)
            # Round discrete parameters
            if j in discrete_params:
                population[i][j] = round(population[i][j])

    # Evaluate initial popn
    fitness_values = []
    print("Evaluating initial population...")
    for i in range(population_size):
        print(f"Individual {i+1}/{population_size}:")
        fitness = fitness_fn(population[i], X_train, y_train, X_val, y_val, class_weight_dict)
        fitness_values.append(fitness)
        print(f"  Final fitness = {fitness:.4f}")

    #initial best soln
    best_idx = np.argmin(fitness_values)
    best_fitness = fitness_values[best_idx]
    best_position = population[best_idx].copy()

    print(f"Initial best fitness: {best_fitness:.4f}")

    # Main optimization loop
    for iteration in range(max_iter):
        print(f"\nIteration {iteration+1}/{max_iter}")

        # Update MOA (Math Optimizer Accelerated)
        MOA = 1.0 - (iteration / max_iter)

        # Update MOP (Math Optimizer Probability)
        MOP = 1.0 - ((iteration + 1) / max_iter) ** (1.0 / alpha)

        for i in range(population_size):
            print(f"  Evaluating individual {i+1}/{population_size}:")

            # Create new solution
            new_solution = population[i].copy()

            for j in range(dim):
                lb, ub = bounds[j]

                # Generate random numbers
                r1 = np.random.rand()
                r2 = np.random.rand()
                r3 = np.random.rand()

                # Exploration phase
                if r1 > MOA:
                    if r2 > 0.5:
                        # Addition operator (A)
                        new_solution[j] = best_position[j] + MOP * ((ub - lb) * mu + lb) * r3
                    else:
                        # Subtraction operator (S)
                        new_solution[j] = best_position[j] - MOP * ((ub - lb) * mu + lb) * r3

                # Exploitation phase
                else:
                    if r3 > 0.5:
                        # Multiplication operator (M)
                        new_solution[j] = best_position[j] * MOP * ((ub - lb) * mu + lb)
                    else:
                        # Division operator (D)
                        divisor = MOP * ((ub - lb) * mu + lb)
                        if abs(divisor) > 1e-10:  # Avoid division by zero
                            new_solution[j] = best_position[j] / divisor
                        else:
                            new_solution[j] = best_position[j]

                # Apply bounds
                new_solution[j] = np.clip(new_solution[j], lb, ub)

                # Handle discrete parameters
                if j in discrete_params:
                    new_solution[j] = round(new_solution[j])

            # Evaluate new solution
            new_fitness = fitness_fn(new_solution, X_train, y_train, X_val, y_val, class_weight_dict)

            # Update if better (greedy selection)
            if new_fitness < fitness_values[i]:
                population[i] = new_solution
                fitness_values[i] = new_fitness
                print(f"  --> IMPROVED: {new_fitness:.4f}")

                # Update global best
                if new_fitness < best_fitness:
                    best_fitness = new_fitness
                    best_position = new_solution.copy()
                    best_idx = i
                    print(f"  --> NEW GLOBAL BEST: {best_fitness:.4f}")
            else:
                print(f"  --> No improvement: {new_fitness:.4f} vs {fitness_values[i]:.4f}")

        print(f"Iteration {iteration+1}/{max_iter}, Best Fitness: {best_fitness:.4f}")

    return best_position, best_fitness


In [None]:
'''def SCSO_optimize(X_train, y_train, X_val, y_val, class_weight_dict, population_size=20, max_iter=30):
    """
    SCSO optimization with reduced parameter space
    """
    dim = 5  # [neurons_1, neurons_2, lr(log), epochs]

    bounds = [
        (1 ,2),          #num of layers
        (8, 128),        # neurons 1
        (8, 128),        # neurons 2
        (-4, -1),        # log10 learning rate
        (20, 200)        # epochs
    ]

    # Initialize population
    population = np.zeros((population_size, dim))
    for i in range(population_size):
        for j in range(dim):
            population[i, j] = np.random.uniform(bounds[j][0], bounds[j][1])

    # Evaluate initial population
    fitness_values = []
    print("Evaluating initial population...")
    for i in range(population_size):
        print(f"Individual {i+1}/{population_size}:")
        fitness = fitness_fn(population[i], X_train, y_train, X_val, y_val, class_weight_dict)
        fitness_values.append(fitness)
        print(f"  Final fitness = {fitness:.4f}")

    # Find best solution
    best_idx = np.argmin(fitness_values)
    best_fitness = fitness_values[best_idx]
    best_position = population[best_idx].copy()

    print(f"Initial best fitness: {best_fitness:.4f}")

    # Main optimization loop
    for iteration in range(max_iter):
        print(f"\nIteration {iteration+1}/{max_iter}")

        for i in range(population_size):
            # Current position
            current_pos = population[i].copy()

            # Random values for SCSO equations
            r = np.random.rand()
            R = np.random.rand()

            # Update pos
            if R <= 0.5:
                # Exploitation phase
                gamma = 2 * np.random.rand() - 1  # Random coefficient [-1, 1]
                new_pos = best_position + gamma * np.random.rand(dim) * (best_position - current_pos)
            else:
                # Exploration phase
                if np.random.rand() < 0.5:
                    # Random exploration around best solution
                    new_pos = best_position + np.random.randn(dim) * 0.1 * (bounds[0][1] - bounds[0][0])
                else:
                    new_pos = np.array([np.random.uniform(bounds[j][0], bounds[j][1]) for j in range(dim)])

            # Apply bounds and handle NaN/inf
            for d in range(dim):
                if np.isnan(new_pos[d]) or np.isinf(new_pos[d]):
                    new_pos[d] = np.random.uniform(bounds[d][0], bounds[d][1])
                new_pos[d] = np.clip(new_pos[d], bounds[d][0], bounds[d][1])

            # Evaluate new position
            print(f"  Evaluating individual {i+1}/{population_size}:")
            new_fitness = fitness_fn(new_pos, X_train, y_train, X_val, y_val, class_weight_dict)

            # Update if better
            if new_fitness < fitness_values[i]:
                population[i] = new_pos
                fitness_values[i] = new_fitness
                print(f"  --> IMPROVED: {new_fitness:.4f}")

                # Update global best
                if new_fitness < best_fitness:
                    best_fitness = new_fitness
                    best_position = new_pos.copy()
                    best_idx = i
                    print(f"  --> NEW GLOBAL BEST: {best_fitness:.4f}")
            else:
                print(f"  --> No improvement: {new_fitness:.4f} vs {fitness_values[i]:.4f}")

        print(f"Iteration {iteration+1}/{max_iter}, Best Fitness: {best_fitness:.4f}")

    return best_position, best_fitness'''

In [None]:
def evaluate_best_model(X_train, y_train, X_test, y_test, best_params, class_weight_dict):
    print("Evaluating model based on optimised config:")

    # Fixed parameters
    batch_size = 75

    # Variable parameters
    num_layers=int(best_params[0])
    neurons = [int(best_params[i]) for i in range(1, 1 + num_layers)]
    learning_rate = 10 ** best_params[3]
    epochs = int(best_params[4])

    epochs = max(1, min(epochs, 200))

    print(f"Configuration:")
    print(f"  Number of layers: {num_layers}")
    print(f"  Neurons per layer: {neurons}")
    print(f"  Learning rate: {learning_rate:.6f}")
    print(f"  Batch size: {batch_size} (fixed)")
    print(f"  Epochs: {epochs}")

    model = build_mlp(X_train.shape[1], num_layers, neurons, learning_rate)

    print(f"\nTraining final model...")
    history = model.fit(X_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        class_weight=class_weight_dict,
                        verbose=1)

    y_pred_proba = model.predict(X_test)
    y_pred = (y_pred_proba > 0.5).astype(int).flatten()


    # Calculate metrics
    balanced_acc= balanced_accuracy_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='binary')
    recall = recall_score(y_test, y_pred, average='binary')
    f1 = f1_score(y_test, y_pred, average='binary')

    print("Final Model Performance Metrics")
    print(f"Balanced Accuracy: {balanced_acc:.4f}")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1 Score:  {f1:.4f}")

    # Calculate final SSE_lasso for comparison
    sse = np.sum((y_test - y_pred_proba.flatten()) ** 2)
    lambda_reg = 0.001  # Same as in fitness function
    l1_penalty = 0
    for layer in model.layers:
        if hasattr(layer, 'kernel'):
            l1_penalty += np.sum(np.abs(layer.kernel.numpy()))
    sse_lasso = sse + lambda_reg * l1_penalty

    print(f"\nFinal SSE_lasso: {sse_lasso:.4f}")

    return {
        'balanced_acc': balanced_acc,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'sse_lasso': sse_lasso,
        'model': model,
        'history': history.history
    }

In [None]:
def main():
    # Load dataset
    df = pd.read_csv('/content/Post_LDA_dataset.csv')
    X = df.drop(columns=["target"])
    y = df['target'].values
    X_lda = X.values

    print(f"Dataset shape: {X_lda.shape}")
    print(f"Target distribution: {np.bincount(y)}")

    X_train, X_temp, y_train, y_temp = train_test_split(X_lda, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    print(f"\nOriginal training set distribution: {np.bincount(y_train)}")

    class_weight_dict = {0: 1.0, 1: 1.5}
    print(f"Class weights: {class_weight_dict}")


    best_params, best_fitness = AOA_optimize(X_train, y_train, X_val, y_val, class_weight_dict, population_size=20, max_iter=30)

    print("Optimization results:")
    print(f"Best Parameters Found: {best_params}")
    print(f"Best Fitness (SSE_lasso): {best_fitness:.4f}")

    # Fixed parameters
    batch_size = 75

    # Variable parameters
    num_layers=int(best_params[0])
    neurons = [int(best_params[i]) for i in range(1, 1 + num_layers)]
    learning_rate = 10 ** best_params[3]
    epochs = int(best_params[4])

    print(f"\nDecoded Parameters:")
    print(f"  Number of layers: {num_layers}")
    print(f"  Neurons per layer: {neurons}")
    print(f"  Learning rate: {learning_rate:.6f}")
    print(f"  Batch size: {batch_size} (fixed)")
    print(f"  Epochs: {epochs}")

    results = evaluate_best_model(X_train, y_train, X_test, y_test, best_params, class_weight_dict)

    return best_params, best_fitness, results

if __name__ == "__main__":
    main()

Dataset shape: (4240, 1)
Target distribution: [3596  644]

Original training set distribution: [2519  449]
Class weights: {0: 1.0, 1: 1.5}
Evaluating initial population...
Individual 1/20:
  Testing config: layers=1, neurons=[109], lr=0.000866, batch=75, epochs=42
    SSE: 108.0669, L1_penalty: 41.1284, SSE_lasso: 108.1080
  Final fitness = 108.1080
Individual 2/20:
  Testing config: layers=1, neurons=[24], lr=0.029141, batch=75, epochs=22
    SSE: 109.2506, L1_penalty: 19.1701, SSE_lasso: 109.2698
  Final fitness = 109.2698
Individual 3/20:
  Testing config: layers=1, neurons=[96], lr=0.063093, batch=75, epochs=163
    SSE: 94.6975, L1_penalty: 99.5889, SSE_lasso: 94.7971
  Final fitness = 94.7971
Individual 4/20:
  Testing config: layers=1, neurons=[109], lr=0.000737, batch=75, epochs=183
    SSE: 111.2303, L1_penalty: 39.3292, SSE_lasso: 111.2696
  Final fitness = 111.2696
Individual 5/20:
  Testing config: layers=2, neurons=[48, 26], lr=0.004014, batch=75, epochs=68
    SSE: 108.37