In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# ------------------------------------------------------
# 1. Preprocess (same as BLS & INN)
# ------------------------------------------------------

df = pd.read_excel('merged_df.xlsx')
df = df.copy()
df = df.replace({np.nan: 0})   # paper behavior

X = df.drop('label', axis=1).values
y = df['label'].values.astype(float)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ------------------------------------------------------
# 2. Basis: Î¦ = [1, |x|]
# ------------------------------------------------------

def build_basis(X):
    ones = np.ones((X.shape[0], 1))
    abs_X = np.abs(X)
    return np.hstack([ones, abs_X])

Phi_train = build_basis(X_train)
Phi_test = build_basis(X_test)

# ------------------------------------------------------
# 3. LSLC Training (paper-accurate)
# ------------------------------------------------------

def train_LSLC(Phi, y):
    n_features = Phi.shape[1]

    B = Phi
    BtB = B.T @ B
    Bty = B.T @ y

    # Constraint: sum(w) = 1
    C = np.ones((1, n_features))
    d = np.array([1.0])

    # Build block matrix
    top = np.hstack([2 * BtB, C.T])
    bottom = np.hstack([C, np.zeros((1, 1))])
    A = np.vstack([top, bottom])

    rhs = np.hstack([2 * Bty, d])  # RHS = [2B^Ty ; d]

    # Solve linear system
    sol = np.linalg.solve(A, rhs)

    w = sol[:-1]  # last element is lambda
    return w

w_lslc = train_LSLC(Phi_train, y_train)

y_pred_train_cont = Phi_train @ w_lslc
y_pred_train = (y_pred_train_cont >= 0.5).astype(int)

acc = accuracy_score(y_train, y_pred_train)
prec = precision_score(y_train, y_pred_train, zero_division=0)
rec = recall_score(y_train, y_pred_train, zero_division=0)
f1 = f1_score(y_train, y_pred_train, zero_division=0)
cm = confusion_matrix(y_train, y_pred_train)

print("========= LSLC Results (Train Set) =========")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")
print("\nConfusion Matrix:")
print(cm)
print("\nClassification Report:")
print(classification_report(y_train, y_pred_train, target_names=['Benign', 'Malignant'], zero_division=0))

# ------------------------------------------------------
# 4. Evaluation
# ------------------------------------------------------

y_pred_cont = Phi_test @ w_lslc
y_pred = (y_pred_cont >= 0.5).astype(int)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, zero_division=0)
rec = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
cm = confusion_matrix(y_test, y_pred)

print("========= LSLC Results (Test Set) =========")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")
print("\nConfusion Matrix:")
print(cm)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Benign', 'Malignant'], zero_division=0))


Accuracy:  0.9083
Precision: 0.9109
Recall:    0.9787
F1 Score:  0.9436

Confusion Matrix:
[[17  9]
 [ 2 92]]

Classification Report:
              precision    recall  f1-score   support

      Benign       0.89      0.65      0.76        26
   Malignant       0.91      0.98      0.94        94

    accuracy                           0.91       120
   macro avg       0.90      0.82      0.85       120
weighted avg       0.91      0.91      0.90       120

Accuracy:  0.8667
Precision: 0.9167
Recall:    0.9167
F1 Score:  0.9167

Confusion Matrix:
[[ 4  2]
 [ 2 22]]

Classification Report:
              precision    recall  f1-score   support

      Benign       0.67      0.67      0.67         6
   Malignant       0.92      0.92      0.92        24

    accuracy                           0.87        30
   macro avg       0.79      0.79      0.79        30
weighted avg       0.87      0.87      0.87        30

