In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
import random
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output # For displaying widgets and clearing output

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

# --- 1. Configuration and Data Loading ---
data_dir = "/content/drive/My Drive/Dataset/archive"

In [None]:
# Image size & batch
IMG_SIZE = 160
BATCH_SIZE = 32

# Load dataset with validation split
print(f"Loading data from: {data_dir}")
train_ds = image_dataset_from_directory(
    data_dir,
    validation_split=0.2,   # 80/20 split for training/validation
    subset="training",
    seed=123,               # Seed for reproducible split
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)

val_ds = image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)


Loading data from: /content/drive/My Drive/Dataset/archive
Found 1000 files belonging to 2 classes.
Using 800 files for training.
Found 1000 files belonging to 2 classes.
Using 200 files for validation.


In [None]:
# Preprocessing for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
print("Data loading and preprocessing complete.")

Data loading and preprocessing complete.


In [None]:
# --- 2. Feature Extractor (MobileNetV2 Embeddings) ---
# Load MobileNetV2 pre-trained on ImageNet without the top classification layer.
# 'pooling=avg' adds a GlobalAveragePooling2D layer, outputting a 1280-feature vector.
print("Loading MobileNetV2 base model...")
base_model = MobileNetV2(
    weights='imagenet',        # Use pre-trained ImageNet weights
    include_top=False,         # Exclude the classifier at the top
    pooling='avg',             # Apply global average pooling
    input_shape=(IMG_SIZE, IMG_SIZE, 3) # Input image dimensions (height, width, channels)
)
print("MobileNetV2 loaded. Extracting embeddings...")

def extract_embeddings(dataset):
    """
    Extracts feature embeddings and labels from a TensorFlow dataset using the base_model.
    """
    X, y = [], []
    for batch, labels in dataset:
        # Preprocess input according to MobileNetV2 requirements (scaling pixel values)
        processed_batch = preprocess_input(batch)
        # Get embeddings from the base model
        emb = base_model(processed_batch).numpy()
        X.append(emb)
        y.append(labels.numpy())
    return np.vstack(X), np.hstack(y)

# Extract embeddings for training and validation datasets
X_train, y_train = extract_embeddings(train_ds)
X_val, y_val = extract_embeddings(val_ds)
num_features = X_train.shape[1] # Number of features (1280 for MobileNetV2 avg pooling)
print(f"Extracted {num_features} features per image. X_train shape: {X_train.shape}, X_val shape: {X_val.shape}")

Loading MobileNetV2 base model...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
MobileNetV2 loaded. Extracting embeddings...
Extracted 1280 features per image. X_train shape: (800, 1280), X_val shape: (200, 1280)


In [None]:
# --- 3. Helper Function: Evaluate a Feature Subset ---
def evaluate_features(feature_mask):
    """
    Trains a Logistic Regression classifier on a subset of features defined by the mask
    and returns the validation accuracy.
    """
    # If no features are selected, accuracy is 0
    if sum(feature_mask) == 0:
        return 0

    # Initialize and train a Logistic Regression classifier
    clf = LogisticRegression(max_iter=500, solver='liblinear', random_state=42) # Added random_state for reproducibility

    # Select features based on the mask
    X_train_subset = X_train[:, feature_mask==1]
    X_val_subset = X_val[:, feature_mask==1]

    clf.fit(X_train_subset, y_train)

    # Predict on the validation set
    y_pred = clf.predict(X_val_subset)

    # Return accuracy score
    return accuracy_score(y_val, y_pred)


In [None]:
# --- 4. Feature Selection Search Algorithms ---

# a. Random Search
# -------------------------
def random_search(iters=20):
    """
    Randomly samples feature masks and evaluates their performance.
    """
    # print(f"\nRunning Random Search for {iters} iterations...") # Suppressed for cleaner widget output
    best_acc, best_mask = 0, None
    for i in range(iters):
        mask = np.random.randint(0, 2, num_features)
        acc = evaluate_features(mask)
        if acc > best_acc:
            best_acc, best_mask = acc, mask
    return best_acc, best_mask


In [None]:
# b. Greedy Search (Forward Selection)
# -------------------------
def greedy_search():
    """
    Performs a greedy forward selection to find the best feature subset.
    """
    # print("\nRunning Greedy Search...") # Suppressed for cleaner widget output
    mask = np.zeros(num_features, dtype=int)
    best_acc = 0
    improving = True
    # iteration = 0 # Suppressed for cleaner widget output
    while improving:
        improving = False
        current_best_feature_idx = -1
        current_best_improvement_acc = best_acc

        for i in range(num_features):
            if mask[i] == 0: # Only consider unselected features
                trial_mask = mask.copy()
                trial_mask[i] = 1 # Try adding this feature
                acc = evaluate_features(trial_mask)

                if acc > current_best_improvement_acc:
                    current_best_improvement_acc = acc
                    current_best_feature_idx = i
                    improving = True

        if improving:
            mask[current_best_feature_idx] = 1 # Permanently add the best feature
            best_acc = current_best_improvement_acc
            # iteration += 1 # Suppressed for cleaner widget output
            # print(f"  Greedy Search Iter {iteration}, Added Feature {current_best_feature_idx}, New Best Acc: {best_acc:.4f}") # Suppressed for cleaner widget output
    return best_acc, mask

In [None]:
# c. Genetic Algorithm
# -------------------------
def genetic_algorithm(pop_size=10, generations=5, mutation_rate=0.1):
    """
    Applies a genetic algorithm to search for the optimal feature mask.
    """
    # print(f"\nRunning Genetic Algorithm with pop_size={pop_size}, generations={generations}, mutation_rate={mutation_rate}...") # Suppressed for cleaner widget output
    # Initialize population
    population = [np.random.randint(0, 2, num_features) for _ in range(pop_size)]

    def fitness(mask):
        """Helper to calculate fitness (accuracy) of a given mask."""
        return evaluate_features(mask)

    for gen in range(generations):
        scores = [fitness(ind) for ind in population]

        # Sort population by score and select top half (elitism)
        ranked_population = [ind for _, ind in sorted(zip(scores, population), key=lambda x: x[0], reverse=True)]
        population = ranked_population[:pop_size//2]  # Keep top half

        children = []
        while len(children) < pop_size - len(population):
            # Select two parents randomly from the current elite population
            p1, p2 = random.sample(population, 2)

            # Crossover: create a child by combining parents
            cut_point = random.randint(0, num_features - 1)
            child = np.concatenate([p1[:cut_point], p2[cut_point:]])

            # Mutation: randomly flip a bit in the child's mask
            if random.random() < mutation_rate:
                mutation_idx = random.randint(0, num_features - 1)
                child[mutation_idx] = 1 - child[mutation_idx] # Flip 0 to 1 or 1 to 0
            children.append(child)

        population += children # Add children to the new population

        # print(f"  Genetic Algorithm Gen {gen+1}/{generations}, Best Acc in Gen: {max(scores):.4f}") # Suppressed for cleaner widget output

    # Final evaluation of the last generation's population
    final_scores = [fitness(ind) for ind in population]
    best_idx = np.argmax(final_scores)

    return final_scores[best_idx], population[best_idx]


# frontenddd works

# Define a function that runs the algorithms and plots the results
def run_analysis(rs_iters, ga_pop_size, ga_generations, ga_mutation_rate):
    with output_area: # Direct output to the designated output widget
        clear_output(wait=True) # Clear previous results for a fresh plot
        print("--- Starting Algorithm Runs ---")

        # Run algorithms with user-defined parameters
        rs_acc, _ = random_search(rs_iters)
        # Greedy Search parameters are fixed, so it runs once
        gr_acc, _ = greedy_search()
        ga_acc, _ = genetic_algorithm(ga_pop_size, ga_generations, ga_mutation_rate)

        print("--- Algorithm Runs Complete ---")

        print("\n--- Results ---")
        print(f"Random Search Accuracy: {rs_acc:.4f}")
        print(f"Greedy Search Accuracy: {gr_acc:.4f}")
        print(f"Genetic Algorithm Accuracy: {ga_acc:.4f}")

        # --- Visualization ---
        print("\n--- Generating Visualization ---")
        algorithms = ['Random Search', 'Greedy Search', 'Genetic Algorithm']
        accuracies = [rs_acc, gr_acc, ga_acc]

        plt.figure(figsize=(10, 6))
        bars = plt.bar(algorithms, accuracies, color=['skyblue', 'lightcoral', 'lightgreen'])

        plt.xlabel('Feature Selection Algorithm', fontsize=12)
        plt.ylabel('Validation Accuracy', fontsize=12)
        plt.title('Comparison of Feature Selection Algorithm Accuracies', fontsize=14, fontweight='bold')

        # Adjust y-axis limit dynamically
        min_acc = min(accuracies) if accuracies else 0
        max_acc = max(accuracies) if accuracies else 0.1
        plt.ylim(min_acc * 0.9, max_acc * 1.1 if max_acc > 0 else 0.1)
        if plt.ylim()[1] < 0.1:
            plt.ylim(0, 0.1)

        for bar in bars:
            yval = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2, yval + 0.005, f'{yval:.4f}', ha='center', va='bottom', fontsize=10, color='black')

        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.axhline(y=0.5, color='gray', linestyle=':', linewidth=1, label='Baseline 0.5 (Random Guess)')
        plt.legend() # Show legend for baseline
        plt.tight_layout()
        plt.show()
        print("Visualization generated successfully.")


# Create widgets for parameters
rs_iters_slider = widgets.IntSlider(value=20, min=10, max=100, step=5, description='Random Search Iters:')
ga_pop_size_slider = widgets.IntSlider(value=10, min=5, max=50, step=5, description='GA Pop Size:')
ga_generations_slider = widgets.IntSlider(value=5, min=1, max=20, step=1, description='GA Generations:')
ga_mutation_rate_slider = widgets.FloatSlider(value=0.1, min=0.01, max=0.5, step=0.05, description='GA Mutation Rate:')
run_button = widgets.Button(description="Run Analysis")
output_area = widgets.Output() # Create an output area widget

# Display widgets
print("\n--- Adjust Parameters and Run Analysis ---")
display(widgets.VBox([
    rs_iters_slider,
    ga_pop_size_slider,
    ga_generations_slider,
    ga_mutation_rate_slider,
    run_button
]), output_area)

# Define the action for the button click
def on_button_clicked(b):
    run_analysis(
        rs_iters_slider.value,
        ga_pop_size_slider.value,
        ga_generations_slider.value,
        ga_mutation_rate_slider.value
    )

run_button.on_click(on_button_clicked)

# Optional: Run analysis once on startup with default values
with output_area:
    run_analysis(
        rs_iters_slider.value,
        ga_pop_size_slider.value,
        ga_generations_slider.value,
        ga_mutation_rate_slider.value
    )



--- Adjust Parameters and Run Analysis ---


VBox(children=(IntSlider(value=20, description='Random Search Iters:', min=10, step=5), IntSlider(value=10, de…

Output()