In [14]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn import metrics
# 加载Wine Quality数据集
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# 将质量等级分为三类：低质量（1-4）、中等质量（5-6）和高质量（7-10）
data['quality'] = data['quality'].apply(lambda x: 0 if x <= 4 else (1 if x <= 6 else 2))

# 分离特征和标签
X = data.drop('quality', axis=1)
y = data['quality']

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 使用带有RBF核的SVM
svm = SVC(kernel='rbf', C=3, gamma=2)

# 训练模型
svm.fit(X_train, y_train)

# 在测试集上预测
y_pred = svm.predict(X_test)
labels = list(range(0, 3))  # 就是y中有多少种分类，就给多少个标签
# 计算准确率
test_accuracy = metrics.precision_score(y_test, y_pred, labels=labels, average='macro', zero_division=1)
print("Test Accuracy:", test_accuracy)

Test Accuracy: 0.9419780219780219


In [3]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
import pandas as pd
from sklearn import metrics
def elite_retention(crows, fitness):
    # 找到当前最优解
    best_index = np.argmax(fitness)
    gbest = crows[best_index]
    print('最好的fitness是：',fitness[best_index])
    return gbest

def adaptive_step(current_iter, max_iter, fl_max, fl_min):
    # 计算当前迭代的飞行步长
    #fl = fl_max - (fl_max - fl_min) * (2 * (current_iter / max_iter) - (current_iter / max_iter)**2)
    fl = fl_min + (fl_max - fl_min) * (2 * (current_iter / max_iter) - (current_iter / max_iter)**2)
    
    return fl

def is_prime(n):
    """判断一个数是否为素数"""
    if n <= 1:
        return False
    for i in range(2, int(np.sqrt(n)) + 1):
        if n % i == 0:
            return False
    return True

def init_pop(pop_size, dimension, bounds):
    """使用佳点集方法生成初始种群"""
    p = np.zeros((pop_size, dimension))
    prime_number_min = dimension * 2 + 3
    # 找到(prime_number_min-3)/2>=dimension的最小素数prime_number_min
    while not is_prime(prime_number_min):
        prime_number_min += 1
    
    #由于随机生成的方式，可能会出现某些维度上的值并不满足预设的边界条件，尤其是当边界非常狭窄时（例如，你的第二个维度的范围是从 0.00001 到 1）。
    for i in range(pop_size):
        for j in range(dimension):
            r = np.mod(2 * np.cos(2 * np.pi * (j + 1) / prime_number_min) * (i + 1), 1)
            p[i, j] = bounds[j, 0] + r * (bounds[j, 1] - bounds[j, 0])
    return p

def OBL(crows,top,low,pop_size,dimension):
    # print(top + low )
    # print('开始前：',crows)
    print('开始前形状：', crows.shape)
    tempcrow = np.zeros((pop_size, dimension))
    for i in range(pop_size):
        tempcrow[i] = top + low - crows[i]
    
    # 将 tempcrow 添加到 crows 的末尾
    crows = np.vstack((crows, tempcrow))
    # 确保所有值都在合法范围内
    crows = np.clip(crows, bounds[:, 0], bounds[:, 1])
    # print('之后：',crows)
    print('开始前形状：', crows.shape)
    # exit(0)
    return crows
    # return top + low -crows
# 定义计算函数
def APFunction(t,T,AP_max , AP_min, n, p, alpha=0, beta=1):
    return AP_min + (AP_max-AP_min)*(1 - (t / T)**n * (1 + alpha * np.sin(np.pi * (t / T)**p) ** beta))
def crow_search_algorithm(num_crows, num_dimensions, bounds,max_iter, AP,AP_max,AP_min,alpha , fl_max, fl_min, X_train, y_train):
    # # 初始化乌鸦位置（修改初始化范围和采用对数均匀分布）
    # crows = np.zeros((num_crows, num_dimensions))
    # for i in range(num_crows):
    #     crows[i][0] = np.exp(np.random.uniform(np.log(1), np.log(100000)))
    #     crows[i][1] = np.exp(np.random.uniform(np.log(0.000001), np.log(1)))
    
    # # 定义种群大小
    # pop_size = 100
    # # 定义种群的取值范围
    # dimension_2 = 2
    # bounds_2 = np.array([[0, 100], [0.001, 1]])

    #二维、三维的佳点集种群
    crows = init_pop(num_crows, num_dimensions, bounds)
    
    crows = OBL(crows, np.array([100,100]), np.array([0,0]), num_crows, num_dimensions)
    
    # print(crows)
    # exit(0)
    # crows_temp = np.copy(crows)
    fitness = np.zeros(num_crows * 2)
    
    for i in range(num_crows * 2):
        # 计算每个乌鸦的适应度值
        # 使用径向基函数(rbf)作为核，初始化SVC模型
        # 其中C参数和gamma参数通过crows数组获取
        svc = SVC(kernel='rbf',C=crows[i][0], gamma=crows[i][1])
        svc.fit(X_train, y_train)
        y_test_pred = svc.predict(X_test)
        # 将结果存储在fitness数组的第i个位置
        fitness[i] = metrics.precision_score(y_test, y_test_pred, labels=labels, average='macro', zero_division=1)
    print('fitness is',fitness)
    
    # 精英保留
    gbest = elite_retention(crows, fitness)
    t = 0
    AP_n = 2
    AP_p = 1
    AP_alpha=0.5
    AP_beta=2
    for t in range(max_iter):
        fl = adaptive_step(t, max_iter, fl_max, fl_min)
        
        AP = APFunction(t, max_iter,AP_max,AP_min, AP_n, AP_p, AP_alpha, AP_beta)
        for i in range(num_crows * 2):
            r1 = np.random.rand()
            r2 = np.random.rand()
            # print('cros is ',crows[i])
            crows_temp = np.zeros(crows[i].shape)
            if r1 >= AP:
                # 更新乌鸦位置
                crows_temp = crows[i] + r2 * fl * (gbest - crows[i])
                # print('best cros_temp is ',crows_temp)
            else:
                crows_temp[0] = np.exp(np.random.uniform(np.log(bounds[0, 0]), np.log(bounds[0, 1])))
                crows_temp[1] = np.exp(np.random.uniform(np.log(bounds[1, 0]), np.log(bounds[1, 1])))
                # print('random cros_temp is ',crows_temp)
            crows_temp = np.clip(crows_temp, bounds[:, 0], bounds[:, 1])
            # print('cros_temp is ',crows_temp)
            # 计算新的适应度值
            svc = SVC(kernel='rbf',C=crows_temp[0] , gamma=crows_temp[1])
            # scores = cross_val_score(svc, X_train, y_train, cv=5, scoring='accuracy')
            svc.fit(X_train, y_train)
            y_test_pred = svc.predict(X_test)
            # 将结果存储在fitness数组的第i个位置
            new_fitness = metrics.precision_score(y_test, y_test_pred, labels=labels, average='macro', zero_division=1)
            
            # 如果新位置更好，则更新
            if new_fitness > fitness[i]:
                fitness[i] = new_fitness
                crows[i] = crows_temp
        
        # 更新全局最优解
        gbest = elite_retention(crows, fitness)
        print("C:", gbest[0])
        # t = t + 1
        print("γ:", gbest[1])
    # 返回最优参数
    best_params = {'kernel':'rbf','C': gbest[0], 'gamma': gbest[1]}
    
    return best_params


# 加载Wine Quality数据集
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# # 将质量等级分为三类：低质量（1-4）、中等质量（5-6）和高质量（7-10）
data['quality'] = data['quality'].apply(lambda x: 0 if x <= 4 else (1 if x <= 6 else 2))

# 分离特征和标签
X = data.drop('quality', axis=1)
y = data['quality']

# 划分训练集和测试集
#控制随机种子：random_state 参数用于设置随机数生成器的种子。这意味着每次运行代码时，如果 random_state 的值相同，数据集的分割结果将会一致。
# 可重复性：通过设置 random_state，可以确保实验的可重复性。这对于调试和验证模型非常重要，因为你可以确保每次运行代码时使用的训练集和测试集是相同的。
# 默认值：如果 random_state 未设置（即为 None），train_test_split 函数将使用一个随机的种子值，这会导致每次运行代码时数据集的分割结果不同。
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# # 特征选择
# svc = SVC(kernel='linear')
# selector = RFE(svc, n_features_to_select=5, step=1)
# selector = selector.fit(X_train, y_train)
# X_train_selected = selector.transform(X_train)
# X_test_selected = selector.transform(X_test)

# 参数设置
num_crows = 100
num_dimensions = 2  # C和gamma两个参数
bounds = np.array([[1e-10, 200], [1e-10, 200]]) #两个参数分别的范围,把0变成1e-10是为了防止对数np.log引发除零错误。

max_iter = 100
AP = 0.2
AP_max = 1
AP_min = 0.01

alpha = 0.5  # CSA 参数
fl_max = 2
fl_min = 0.01
labels = list(range(0, 3))  # 就是y中有多少种分类，就给多少个标签

# 运行乌鸦搜索算法
best_params = crow_search_algorithm(num_crows, num_dimensions, bounds, max_iter, AP, AP_max, AP_min, alpha, fl_max, fl_min, X_train, y_train)
# best_params = {'kernel':'rbf','C': 1, 'gamma': 100}
# 使用最优参数训练SVM模型
svc = SVC(**best_params)
svc.fit(X_train, y_train)
y_train_pred = svc.predict(X_train)
y_test_pred = svc.predict(X_test)


# print(y_test_pred)
# 评估模型
#train_scores = cross_val_score(svc, X_train, y_train, cv=5, scoring='accuracy') 这个方式准确率太低了，所以就算了
train_scores = metrics.precision_score(y_train, y_train_pred, labels=labels, average='macro', zero_division=1)
test_scores = metrics.precision_score(y_test, y_test_pred, labels=labels, average='macro', zero_division=1)

print("Optimized SVM training accuracy:", train_scores)
print("Optimized SVM testing accuracy:", test_scores)

开始前形状： (100, 2)
开始前形状： (200, 2)
fitness is [0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94982079 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94982079 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94982079 0.94921316 0.94921316 0.94921316 0.94921316
 0.94921316 0.94921316 0.94921316 0.94921316 0.94982079 0.94921316
 0.94921316 0.94921