In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import time

# ------------------------------------------------------
# 1. Preprocess (same as BLS, INN, LSLC)
# ------------------------------------------------------

df = pd.read_excel('merged_df.xlsx')
df = df.copy()
df = df.replace({np.nan: 0})

X = df.drop('label', axis=1).values
y = df['label'].values.astype(float)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ------------------------------------------------------
# 2. Basis: Î¦ = [1, |x|]
# ------------------------------------------------------

def build_basis(X):
    ones = np.ones((X.shape[0], 1))
    abs_X = np.abs(X)
    return np.hstack([ones, abs_X])

Phi_train = build_basis(X_train)
Phi_test = build_basis(X_test)

n_features = Phi_train.shape[1]


# ------------------------------------------------------
# 3. Cost function = Mean Squared Error
# ------------------------------------------------------

def cost_function(w, Phi, y):
    preds = Phi @ w
    return np.mean((preds - y)**2)


# ------------------------------------------------------
# 4. ICA Implementation (Paper-Faithful)
# ------------------------------------------------------

def ICA(
    Phi, y,
    n_countries=50,
    n_imperialists=5,
    assimilation_coeff=1.5,
    revolution_rate=0.1,
    max_iter=200
):
    n_features = Phi.shape[1]

    # --- Initialize population (countries = random weight vectors)
    countries = np.random.uniform(-1, 1, size=(n_countries, n_features))

    # Compute costs
    costs = np.array([cost_function(w, Phi, y) for w in countries])

    # Sort by cost
    sorted_idx = np.argsort(costs)
    countries = countries[sorted_idx]
    costs = costs[sorted_idx]

    # Imperialists & colonies
    imperialists = countries[:n_imperialists]
    imperialist_costs = costs[:n_imperialists]

    colonies = countries[n_imperialists:]
    colony_costs = costs[n_imperialists:]

    # Assign colonies to imperialists based on relative power
    n_colonies = len(colonies)
    powers = np.max(imperialist_costs) - imperialist_costs + 1e-9
    probabilities = powers / np.sum(powers)
    colony_assignment = np.random.choice(n_imperialists, size=n_colonies, p=probabilities)

    # --- Main ICA Loop ---
    for _ in range(max_iter):

        # 1. Assimilation (colonies move toward imperialist)
        for i in range(n_colonies):
            imp = imperialists[colony_assignment[i]]
            col = colonies[i]

            step = assimilation_coeff * np.random.rand() * (imp - col)
            colonies[i] = col + step

        # 2. Revolution (random perturbation)
        n_revolutions = int(revolution_rate * n_colonies)
        for _ in range(n_revolutions):
            idx = np.random.randint(0, n_colonies)
            colonies[idx] = np.random.uniform(-1, 1, size=n_features)

        # 3. Recalculate costs
        colony_costs = np.array([cost_function(w, Phi, y) for w in colonies])
        imperialist_costs = np.array([cost_function(w, Phi, y) for w in imperialists])

        # 4. Colony/imperialist exchange (if colony becomes stronger)
        for i in range(n_colonies):
            imp_idx = colony_assignment[i]
            if colony_costs[i] < imperialist_costs[imp_idx]:
                # Swap
                imperialists[imp_idx], colonies[i] = colonies[i], imperialists[imp_idx]
                imperialist_costs[imp_idx], colony_costs[i] = colony_costs[i], imperialist_costs[imp_idx]

        # 5. Imperialistic competition (weakest empire loses colonies)
        total_costs = imperialist_costs + 0.1 * np.random.rand(len(imperialists))
        weakest = np.argmax(total_costs)
        strongest = np.argmin(total_costs)

        # Transfer one colony from weakest to strongest
        if n_colonies > 0:
            target = np.where(colony_assignment == weakest)[0]
            if len(target) > 0:
                colony_assignment[target[0]] = strongest

    # Best imperialist is the final solution
    best_idx = np.argmin(imperialist_costs)
    return imperialists[best_idx]


# ------------------------------------------------------
# 5. Train ICA
# ------------------------------------------------------

t0 = time.time()
w_ica = ICA(Phi_train, y_train, max_iter=200)
t1 = time.time()

print(f"ICA training time: {t1 - t0:.8f} seconds")


y_pred_train_cont = Phi_train @ w_ica
y_pred_train = (y_pred_train_cont >= 0.5).astype(int)

acc = accuracy_score(y_train, y_pred_train)
prec = precision_score(y_train, y_pred_train, zero_division=0)
rec = recall_score(y_train, y_pred_train, zero_division=0)
f1 = f1_score(y_train, y_pred_train, zero_division=0)
cm = confusion_matrix(y_train, y_pred_train)

print("========= ICA Results (Train Set) =========")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")
print("\nConfusion Matrix:")
print(cm)
print("\nClassification Report:")
print(classification_report(y_train, y_pred_train, target_names=['Benign', 'Malignant'], zero_division=0))

# ------------------------------------------------------
# 6. Evaluate ICA on Test Set
# ------------------------------------------------------

y_pred_cont = Phi_test @ w_ica
y_pred = (y_pred_cont >= 0.5).astype(int)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, zero_division=0)
rec = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
cm = confusion_matrix(y_test, y_pred)

print("========= ICA Results (Test Set) =========")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")
print("\nConfusion Matrix:")
print(cm)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Benign', 'Malignant'], zero_division=0))


ICA training time: 0.04268527 seconds
Accuracy:  0.8833
Precision: 0.9255
Recall:    0.9255
F1 Score:  0.9255

Confusion Matrix:
[[19  7]
 [ 7 87]]

Classification Report:
              precision    recall  f1-score   support

      Benign       0.73      0.73      0.73        26
   Malignant       0.93      0.93      0.93        94

    accuracy                           0.88       120
   macro avg       0.83      0.83      0.83       120
weighted avg       0.88      0.88      0.88       120

Accuracy:  0.9333
Precision: 1.0000
Recall:    0.9167
F1 Score:  0.9565

Confusion Matrix:
[[ 6  0]
 [ 2 22]]

Classification Report:
              precision    recall  f1-score   support

      Benign       0.75      1.00      0.86         6
   Malignant       1.00      0.92      0.96        24

    accuracy                           0.93        30
   macro avg       0.88      0.96      0.91        30
weighted avg       0.95      0.93      0.94        30

