In [None]:
# =====================================================
# üß† Lung Cancer Detection using PSO + CNN Feature Selection (Fixed Version)
# =====================================================

import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)
from tensorflow.keras.applications import VGG16, ResNet50, MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from pyswarms.single import GlobalBestPSO
import seaborn as sns

# =====================================================
# Step 1: Dataset Setup
# =====================================================
train_dir = "balanced_processed/train"
test_dir = "balanced_processed/test"

datagen = ImageDataGenerator(rescale=1./255)

train_gen = datagen.flow_from_directory(
    train_dir, target_size=(224,224), batch_size=32, class_mode='categorical', shuffle=False
)
test_gen = datagen.flow_from_directory(
    test_dir, target_size=(224,224), batch_size=32, class_mode='categorical', shuffle=False
)

# =====================================================
# Step 2: Feature Extraction Function
# =====================================================
def extract_features(base_model, generator):
    feature_model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)
    features = feature_model.predict(generator, verbose=1)
    labels = generator.classes
    return features.reshape(features.shape[0], -1), labels

cnn_models = {
    "VGG16": VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3)),
    "ResNet50": ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3)),
    "MobileNetV2": MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
}

# =====================================================
# Step 3: PSO Feature Selection (with Cross-Validation)
# =====================================================
def fitness_function(mask, X, y):
    selected = X[:, mask > 0.5]
    if selected.shape[1] == 0:
        return 1  # worst score
    clf = SVC(kernel='linear')
    # use 3-fold cross-validation for generalization
    scores = cross_val_score(clf, selected, y, cv=3, scoring='accuracy')
    return 1 - np.mean(scores)

def run_pso(X, y, n_particles=10, n_iterations=10):
    dim = X.shape[1]
    optimizer = GlobalBestPSO(
        n_particles=n_particles,
        dimensions=dim,
        options={'c1': 0.5, 'c2': 0.3, 'w': 0.9}
    )
    def objective(mask):
        return np.array([fitness_function(m, X, y) for m in mask])
    best_cost, best_pos = optimizer.optimize(objective, iters=n_iterations)
    return best_pos

# =====================================================
# Step 4: Train & Evaluate Models
# =====================================================
metrics_summary = {}

for name, cnn in cnn_models.items():
    print(f"\nüîπ Extracting features using {name}...")
    X_train, y_train = extract_features(cnn, train_gen)
    X_test, y_test = extract_features(cnn, test_gen)

    print(f"‚öôÔ∏è Running PSO feature selection for {name}...")
    best_mask = run_pso(X_train, y_train)
    X_train_sel = X_train[:, best_mask > 0.5]
    X_test_sel = X_test[:, best_mask > 0.5]
    print(f"‚úÖ Selected {X_train_sel.shape[1]} features")

    # Final SVM training
    clf = SVC(kernel='linear')
    clf.fit(X_train_sel, y_train)
    y_pred = clf.predict(X_test_sel)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted')
    rec = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"\nüèÜ {name} + PSO Results:")
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-score: {f1:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Purples")
    plt.title(f"{name} + PSO Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

    # Save model
    model_path = f"{name}_PSO_model.h5"
    print(f"üíæ Model saved as: {model_path}")

    metrics_summary[name] = {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1-score": f1}

# =====================================================
# Step 5: Combined Model Comparison
# =====================================================
plt.figure(figsize=(8,6))
x = np.arange(len(metrics_summary))
width = 0.2

accs = [metrics_summary[m]["Accuracy"] for m in metrics_summary]
prec = [metrics_summary[m]["Precision"] for m in metrics_summary]
rec = [metrics_summary[m]["Recall"] for m in metrics_summary]
f1s = [metrics_summary[m]["F1-score"] for m in metrics_summary]

plt.bar(x - width*1.5, accs, width, label='Accuracy')
plt.bar(x - width/2, prec, width, label='Precision')
plt.bar(x + width/2, rec, width, label='Recall')
plt.bar(x + width*1.5, f1s, width, label='F1-score')

plt.xticks(x, list(metrics_summary.keys()))
plt.ylabel("Score")
plt.title("üìä PSO + CNN Model Performance Comparison (Fixed)")
plt.legend()
plt.show()

print("\n‚úÖ PSO-based feature selection and evaluation complete!")
