<a href="https://colab.research.google.com/github/shahdcode/Credit-Card-Approval-Prediction/blob/main/Genetic_Feature_Selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Particle Swarm Optimization (PSO)

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier


In [2]:
def fitness_function(features, X, y, classifier):
    """
    Evaluate the fitness of a particle.

    Args:
        features (array): Binary array representing selected features.
        X (ndarray): Feature matrix.
        y (ndarray): Target array.
        classifier: A machine learning model (e.g., DecisionTreeClassifier).

    Returns:
        float: The fitness value (lower is better for minimization tasks).
    """
    # Select features based on the binary array
    selected_features = X[:, features == 1]

    # If no features are selected, return a high fitness value
    if selected_features.shape[1] == 0:
        return float('inf')

    # Perform a simple train-test split (80%-20%)
    X_train, X_test, y_train, y_test = train_test_split(selected_features, y, test_size=0.2, random_state=42)

    # Train the classifier
    classifier.fit(X_train, y_train)

    # Predict and calculate accuracy
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    # Return the negative accuracy as fitness (minimization task)
    return -accuracy


In [3]:
def initialize_swarm(n_particles, n_features, bounds, fitness_function, X, y, classifier):
    """
    Initialize the swarm for PSO.

    Args:
        n_particles (int): Number of particles.
        n_features (int): Number of features.
        bounds (tuple): Bounds for the feature mask (binary: 0 or 1).
        fitness_function (callable): The fitness function.
        X (ndarray): Feature matrix.
        y (ndarray): Target array.
        classifier: A machine learning model.

    Returns:
        positions, velocities, pbest_positions, pbest_fitness, gbest_position, gbest_fitness
    """
    lower_bounds, upper_bounds = bounds

    # Randomly initialize positions (binary: 0 or 1) and velocities
    positions = np.random.randint(lower_bounds, upper_bounds + 1, size=(n_particles, n_features))
    velocities = np.random.uniform(-1, 1, (n_particles, n_features))

    # Evaluate initial fitness
    fitness = np.array([fitness_function(positions[i], X, y, classifier) for i in range(n_particles)])

    # Set personal bests (pbest)
    pbest_positions = positions.copy()
    pbest_fitness = fitness.copy()

    # Set global best (gbest)
    gbest_index = np.argmin(pbest_fitness)
    gbest_position = pbest_positions[gbest_index]
    gbest_fitness = pbest_fitness[gbest_index]

    return positions, velocities, pbest_positions, pbest_fitness, gbest_position, gbest_fitness


In [4]:
def return_best_solution(gbest_position, gbest_fitness, feature_names):
    """
    Return the best solution found by the swarm.

    Args:
        gbest_position (array): The best feature mask.
        gbest_fitness (float): The best fitness value.
        feature_names (list): List of feature names.

    Returns:
        dict: A dictionary containing selected features and their fitness.
    """
    selected_features = [feature_names[i] for i in range(len(gbest_position)) if gbest_position[i] == 1]
    return {
        "Selected Features": selected_features,
        "Best Fitness": -gbest_fitness  # Accuracy is the positive value of fitness
    }


In [6]:
# Main script
if __name__ == "__main__":
    # Load the dataset
    application_record = pd.read_csv("path/to/application_record.csv")
    credit_record = pd.read_csv("path/to/credit_record.csv")

    # Merge datasets on 'ID'
    dataset = pd.merge(application_record, credit_record, on="ID")

    # Preprocess dataset
    X = dataset.drop(columns=["ID", "STATUS"]).values  # Features
    y = dataset["STATUS"].map({"C": 0, "X": 0, "0": 1, "1": 1, "2": 1, "3": 1, "4": 1, "5": 1}).values  # Target

    # Parameters (Adjust these based on your dataset)
    n_particles = 30  # Number of particles (Try increasing for better exploration)
    n_features = X.shape[1]  # Automatically detects the number of features
    bounds = (0, 1)  # Binary bounds for feature selection
    classifier = DecisionTreeClassifier(random_state=42)  # ML model used for evaluation

    # Swarm initialization
    positions, velocities, pbest_positions, pbest_fitness, gbest_position, gbest_fitness = initialize_swarm(
        n_particles=n_particles,
        n_features=n_features,
        bounds=bounds,
        fitness_function=fitness_function,
        X=X,
        y=y,
        classifier=classifier
    )

    # Return the best solution
    feature_names = dataset.drop(columns=["ID", "STATUS"]).columns.tolist()
    best_solution = return_best_solution(gbest_position, gbest_fitness, feature_names)

    # Output the best solution
    print("Best Solution Found:")
    print(best_solution)


NameError: name 'pd' is not defined



---

# Genetic Algorithm Implementation

In [None]:
def set_algorithm_parameters():
    """Set the parameters for the Genetic Algorithm."""
    return {
        "pop_size": 50,
        "num_generations": 100,
        "crossover_rate": 0.8,
        "mutation_rate": 0.01
    }

In [None]:
def select_parents(population, fitness_values):
    """Select parents using Tournament Selection."""
    parents = []
    for _ in range(len(population)):
        # Tournament selection: Randomly pick 3 and select the best
        candidates_idx = np.random.choice(len(population), 3, replace=False)
        best_candidate = max(candidates_idx, key=lambda idx: fitness_values[idx])
        parents.append(population[best_candidate])
    return np.array(parents)


In [None]:
def crossover(parents, crossover_rate):
    """Apply single-point crossover."""
    offspring = []
    for i in range(0, len(parents), 2):
        parent1, parent2 = parents[i], parents[(i + 1) % len(parents)]
        if np.random.rand() < crossover_rate:
            point = np.random.randint(1, len(parent1))
            child1 = np.concatenate([parent1[:point], parent2[point:]])
            child2 = np.concatenate([parent2[:point], parent1[point:]])
            offspring.extend([child1, child2])
        else:
            offspring.extend([parent1, parent2])
    return np.array(offspring)


In [None]:
def mutate(offspring, mutation_rate):
    """Apply bit-flip mutation."""
    for i in range(len(offspring)):
        for j in range(len(offspring[i])):
            if np.random.rand() < mutation_rate:
                offspring[i][j] = 1 - offspring[i][j]  # Flip bit
    return offspring


In [None]:
def populate_next_generation(offspring, fitness_values):
    """Replace population with offspring and apply elitism."""
    elite_idx = np.argmax(fitness_values)
    next_generation = offspring
    next_generation[0] = offspring[elite_idx]  # Ensure elite survives
    return next_generation