In [1]:
import numpy as np
import random
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

In [2]:
data = load_breast_cancer()
X = data.data  # 特征矩阵
y = data.target  # 目标变量


In [16]:
# 目标函数：使用交叉验证评估特征子集的性能
def fitness(chromosome, X, y):
    selected_features = np.where(chromosome)[0]  # 选择特征
    if len(selected_features) == 0:
        return 0  # 如果没有选择任何特征，返回最低分数
    X_subset = X[:, selected_features]  # 提取选择的特征
    model = RandomForestClassifier(random_state=42)  # 使用随机森林作为评估模型
    scores = cross_val_score(model, X_subset, y, cv=5, scoring='accuracy')  # 5折交叉验证
    return np.mean(scores)  # 返回平均准确率

# 初始化种群
def initialize_population(pop_size, chromosome_length):
    return [[random.randint(0, 1) for _ in range(chromosome_length)] for _ in range(pop_size)]

# 选择
def select_parents(population, fitness_values):
    total_fitness = sum(fitness_values)
    probabilities = [fitness_value / total_fitness for fitness_value in fitness_values]
    parents = random.choices(population, weights=probabilities, k=2)
    return parents

# 交叉
def crossover(parent1, parent2):
    crossover_point = random.randint(1, len(parent1) - 1)
    child1 = parent1[:crossover_point] + parent2[crossover_point:]
    child2 = parent2[:crossover_point] + parent1[crossover_point:]
    return child1, child2

# 变异
def mutate(chromosome, mutation_rate):
    for i in range(len(chromosome)):
        if random.random() < mutation_rate:
            chromosome[i] = 1 - chromosome[i]
    return chromosome

# 遗传算法主函数
def genetic_algorithm(X, y, pop_size, chromosome_length, generations, mutation_rate):
    population = initialize_population(pop_size, chromosome_length)
    
    for generation in range(generations):
        # 计算适应度
        fitness_values = [fitness(chromosome, X, y) for chromosome in population]
        
        # 选择、交叉、变异
        new_population = []
        while len(new_population) < pop_size:
            parent1, parent2 = select_parents(population, fitness_values)
            child1, child2 = crossover(parent1, parent2)
            child1 = mutate(child1, mutation_rate)
            child2 = mutate(child2, mutation_rate)
            new_population.extend([child1, child2])
        
        population = new_population[:pop_size]
        
        # 输出每一代的最佳个体
        best_chromosome = max(population, key=lambda x: fitness(x, X, y))
        best_fitness = fitness(best_chromosome, X, y)
        print(f"Generation {generation}: Best Fitness = {best_fitness}, Best Chromosome = {best_chromosome}")
    
    return population

In [17]:
# 参数设置
pop_size = 20  # 种群大小
chromosome_length = X.shape[1]  # 染色体长度（特征数量）
generations = 50  # 迭代次数
mutation_rate = 0.01  # 变异率

In [18]:
# 运行遗传算法
final_population = genetic_algorithm(X, y, pop_size, chromosome_length, generations, mutation_rate)

# 输出最终结果
best_chromosome = max(final_population, key=lambda x: fitness(x, X, y))
best_fitness = fitness(best_chromosome, X, y)
selected_features = np.where(best_chromosome)[0]
print(f"Final Best Fitness = {best_fitness}")
print(f"Selected Features = {selected_features}")
print(f"Number of Selected Features = {len(selected_features)}")

Generation 0: Best Fitness = 0.9630957925787922, Best Chromosome = [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0]
Generation 1: Best Fitness = 0.9683589504735288, Best Chromosome = [1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0]
Generation 2: Best Fitness = 0.9701288619779538, Best Chromosome = [0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0]
Generation 3: Best Fitness = 0.9701599130569788, Best Chromosome = [1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0]
Generation 4: Best Fitness = 0.9683589504735288, Best Chromosome = [1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0]
Generation 5: Best Fitness = 0.9666200900481291, Best Chromosome = [1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0]
Generation 6: Best Fitness = 0.9666200900481291, Bes