In [28]:
import numpy as np
from sklearn.datasets import load_iris

# Load Iris dataset
data = load_iris()
X = data.data  # Feature matrix
y_true = data.target  # True labels (for validation)

# Parameters
population_size = 100
num_features = X.shape[1]
num_objects = X.shape[0]
num_classes = 3
threshold = 0.98
generations = 50

# Step 1: Distance matrix
def calculate_distance_matrix(X):
    return np.linalg.norm(X[:, np.newaxis] - X, axis=2)

distance_matrix = calculate_distance_matrix(X)

# Step 2: Initialize population
def initialize_population(population_size, num_objects, threshold):
    population = np.random.rand(population_size, num_objects) > threshold
    for individual in population:
        if not np.any(individual):  # Ensure at least one medoid
            individual[np.random.randint(num_objects)] = 1
    return population

population = initialize_population(population_size, num_objects, threshold)

# Step 3: Assign non-medoids
def assign_to_medoids(individual, distance_matrix):
    medoids = np.where(individual == 1)[0]
    if len(medoids) == 0:  # Ensure at least one medoid
        medoids = [np.random.randint(len(individual))]
        individual[medoids[0]] = 1
    assignments = np.argmin(distance_matrix[:, medoids], axis=1)
    non_medoids = np.setdiff1d(np.arange(len(individual)), medoids)
    return assignments, medoids, non_medoids

# Step 4: Calculate scores
def calculate_score(assignments, medoids, distance_matrix):
    score = 0
    for medoid in medoids:
        cluster_indices = np.where(assignments == medoid)[0]
        cluster_center = np.mean(X[cluster_indices], axis=0)
        score += np.sum(np.linalg.norm(X[cluster_indices] - cluster_center, axis=1))
    return score / (num_objects * num_features)

# Step 5-7: Genetic algorithm loop
def genetic_algorithm(population, distance_matrix, generations):
    for generation in range(generations):
        scores = []
        classifications = []
        
        for individual in population:
            assignments, medoids, non_medoids = assign_to_medoids(individual, distance_matrix)
            classifications.append(assignments)
            scores.append(calculate_score(assignments, medoids, distance_matrix))
        
        # Print score first for each generation
        print(f"Generation {generation + 1} - Best Score: {min(scores)}")
        
        # Sort by scores
        sorted_indices = np.argsort(scores)
        population = population[sorted_indices]
        classifications = np.array(classifications)[sorted_indices]
        
        # Select top 50% for reproduction
        top_individuals = population[:len(population) // 2]
        
        # Reproduce (crossover + mutation)
        new_population = np.array([
            np.bitwise_xor(top_individuals[np.random.randint(len(top_individuals))],
                           top_individuals[np.random.randint(len(top_individuals))])
            for _ in range(len(population) // 2)
        ])
        
        # Ensure new individuals have at least one medoid
        for individual in new_population:
            if not np.any(individual):
                individual[np.random.randint(len(individual))] = 1
        
        # Replace bottom 50% with new individuals
        population[len(population) // 2:] = new_population
    
    # After the final generation, get the best solution
    final_assignments, final_medoids, final_non_medoids = assign_to_medoids(population[0], distance_matrix)
    final_medoids_non_medoids = np.zeros(num_objects)
    final_medoids_non_medoids[final_medoids] = 1  # Mark medoids with 1, non-medoids with 0
    
    # Display the results for medoids and non-medoids
    print("Medoids Indices:", final_medoids)
    print("Non-Medoids Indices:", final_non_medoids)
    print("Medoids Array (1 for Medoids, 0 for Non-Medoids):")
    print(final_medoids_non_medoids)

    return final_medoids_non_medoids  # Return the final array

# Run the genetic algorithm
final_medoids_non_medoids = genetic_algorithm(population, distance_matrix, generations)


Generation 1 - Best Score: 0.0
Generation 2 - Best Score: 0.0
Generation 3 - Best Score: 0.0
Generation 4 - Best Score: 0.0
Generation 5 - Best Score: 0.0
Generation 6 - Best Score: 0.0
Generation 7 - Best Score: 0.0
Generation 8 - Best Score: 0.0
Generation 9 - Best Score: 0.0
Generation 10 - Best Score: 0.0
Generation 11 - Best Score: 0.0
Generation 12 - Best Score: 0.0
Generation 13 - Best Score: 0.0
Generation 14 - Best Score: 0.0
Generation 15 - Best Score: 0.0
Generation 16 - Best Score: 0.0
Generation 17 - Best Score: 0.0
Generation 18 - Best Score: 0.0
Generation 19 - Best Score: 0.0
Generation 20 - Best Score: 0.0
Generation 21 - Best Score: 0.0
Generation 22 - Best Score: 0.0
Generation 23 - Best Score: 0.0
Generation 24 - Best Score: 0.0
Generation 25 - Best Score: 0.0
Generation 26 - Best Score: 0.0
Generation 27 - Best Score: 0.0
Generation 28 - Best Score: 0.0
Generation 29 - Best Score: 0.0
Generation 30 - Best Score: 0.0
Generation 31 - Best Score: 0.0
Generation 32 - B