In [1]:
import numpy as np
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random
import operator

# Load the diabetes dataset
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the population size and the number of generations
population_size = 100
generations = 100

# Define the mutation probability
mutation_probability = 0.1

# Define the tournament selection size
tournament_size = 5

# Define the fitness function
def fitness(individual):
    # Create a mask for the features
    mask = [bool(gene) for gene in individual]
    
    # Select the features
    X_train_selected = X_train[:, mask]
    X_test_selected = X_test[:, mask]
    
    # Train the SVM model
    model = svm.SVR()
    model.fit(X_train_selected, y_train)
    
    # Predict the target values
    y_pred = model.predict(X_test_selected)
    
    # Calculate the mean squared error
    mse = np.mean((y_pred - y_test) ** 2)
    
    # Return the fitness (1 / mse)
    return 1 / mse

# Initialize the population
population = [[random.randint(0, 1) for _ in range(X.shape[1])] for _ in range(population_size)]

# Evolve the population
for generation in range(generations):
    # Evaluate the fitness of each individual
    fitnesses = [fitness(individual) for individual in population]
    
    # Select the parents using tournament selection
    parents = []
    for _ in range(population_size):
        tournament = random.sample(list(zip(population, fitnesses)), tournament_size)
        winner = max(tournament, key=operator.itemgetter(1))[0]
        parents.append(winner)
    
    # Crossover (single-point crossover)
    offspring = []
    for _ in range(population_size):
        parent1, parent2 = random.sample(parents, 2)
        crossover_point = random.randint(1, len(parent1) - 1)
        child = parent1[:crossover_point] + parent2[crossover_point:]
        offspring.append(child)
    
    # Mutate the offspring
    for i in range(population_size):
        if random.random() < mutation_probability:
            mutation_point = random.randint(0, len(offspring[i]) - 1)
            offspring[i][mutation_point] = 1 - offspring[i][mutation_point]
    
    # Replace the population with the offspring
    population = offspring

# Evaluate the fitness of each individual in the final population
fitnesses = [fitness(individual) for individual in population]

# Select the best individual
best_individual = population[np.argmax(fitnesses)]

# Create a mask for the features
mask = [bool(gene) for gene in best_individual]

# Select the features
X_train_selected = X_train[:, mask]
X_test_selected = X_test[:, mask]

# Train the SVM model
model = svm.SVR()
model.fit(X_train_selected, y_train)

# Predict the target values
y_pred = model.predict(X_test_selected)

# Calculate the mean squared error
mse = np.mean((y_pred - y_test) ** 2)

print("Best individual:", best_individual)
print("Selected features:", [i for i, x in enumerate(mask) if x])
print("Mean squared error:", mse)


Best individual: [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
Selected features: [2, 8]
Mean squared error: 3620.950819473882
