# 支持向量机

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict
import numpy as np

# 1. 加载数据
file_path = r"C:\Users\86178\Desktop\工作簿1.xlsx"
df = pd.read_excel(file_path)

# 2. 数据预处理
X = df.iloc[:, 1:].values  # 特征（去掉第一列）
y = df.iloc[:, 0].values   # 标签（第一列）

# 3. 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. 划分训练集和测试集（7:3）
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# 5. 创建支持向量机模型
model = SVC(kernel='linear', random_state=42)

# 6. 五折交叉验证评估模型
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 7. 使用交叉验证进行训练并预测
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)

# 8. 计算精确率、召回率和F1分数
precision = precision_score(y_train, y_pred, average='weighted')
recall = recall_score(y_train, y_pred, average='weighted')
f1 = f1_score(y_train, y_pred, average='weighted')

# 输出评估指标
print(f"交叉验证的精确率: {precision:.4f}")
print(f"交叉验证的召回率: {recall:.4f}")
print(f"交叉验证的F1分数: {f1:.4f}")

# 9. 使用训练集训练模型并在测试集上评估
model.fit(X_train, y_train)
y_test_pred = model.predict(X_test)

# 在测试集上计算精确率、召回率和F1分数
test_precision = precision_score(y_test, y_test_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print("\n在测试集上的评估结果：")
print(f"精确率: {test_precision:.4f}")
print(f"召回率: {test_recall:.4f}")
print(f"F1分数: {test_f1:.4f}")


交叉验证的精确率: 0.8278
交叉验证的召回率: 0.8283
交叉验证的F1分数: 0.8278

在测试集上的评估结果：
精确率: 0.8425
召回率: 0.8425
F1分数: 0.8423


# 朴素贝叶斯

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict

# 1. 加载数据
file_path = r"C:\Users\86178\Desktop\工作簿1.xlsx"
df = pd.read_excel(file_path)

# 2. 数据预处理
X = df.iloc[:, 1:].values  # 特征（去掉第一列）
y = df.iloc[:, 0].values   # 标签（第一列）

# 3. 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. 划分训练集和测试集（7:3）
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# 5. 创建朴素贝叶斯模型
model = GaussianNB()

# 6. 五折交叉验证评估模型
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 7. 使用交叉验证进行训练并预测
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)

# 8. 计算精确率、召回率和F1分数
precision = precision_score(y_train, y_pred, average='weighted')
recall = recall_score(y_train, y_pred, average='weighted')
f1 = f1_score(y_train, y_pred, average='weighted')

# 输出评估指标
print(f"交叉验证的精确率: {precision:.4f}")
print(f"交叉验证的召回率: {recall:.4f}")
print(f"交叉验证的F1分数: {f1:.4f}")

# 9. 使用训练集训练模型并在测试集上评估
model.fit(X_train, y_train)
y_test_pred = model.predict(X_test)

# 在测试集上计算精确率、召回率和F1分数
test_precision = precision_score(y_test, y_test_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print("\n在测试集上的评估结果：")
print(f"精确率: {test_precision:.4f}")
print(f"召回率: {test_recall:.4f}")
print(f"F1分数: {test_f1:.4f}")


交叉验证的精确率: 0.7579
交叉验证的召回率: 0.7232
交叉验证的F1分数: 0.7127

在测试集上的评估结果：
精确率: 0.7245
召回率: 0.6600
F1分数: 0.6490


# K临近

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict

# 1. 加载数据
file_path = r"C:\Users\86178\Desktop\工作簿1.xlsx"
df = pd.read_excel(file_path)

# 2. 数据预处理
X = df.iloc[:, 1:].values  # 特征（去掉第一列）
y = df.iloc[:, 0].values   # 标签（第一列）

# 3. 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. 划分训练集和测试集（7:3）
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# 5. 创建K-近邻模型
model = KNeighborsClassifier(n_neighbors=5)  # 设置k=5，您可以根据需要调整k值

# 6. 五折交叉验证评估模型
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 7. 使用交叉验证进行训练并预测
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)

# 8. 计算精确率、召回率和F1分数
precision = precision_score(y_train, y_pred, average='weighted')
recall = recall_score(y_train, y_pred, average='weighted')
f1 = f1_score(y_train, y_pred, average='weighted')

# 输出评估指标
print(f"交叉验证的精确率: {precision:.4f}")
print(f"交叉验证的召回率: {recall:.4f}")
print(f"交叉验证的F1分数: {f1:.4f}")

# 9. 使用训练集训练模型并在测试集上评估
model.fit(X_train, y_train)
y_test_pred = model.predict(X_test)

# 在测试集上计算精确率、召回率和F1分数
test_precision = precision_score(y_test, y_test_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print("\n在测试集上的评估结果：")
print(f"精确率: {test_precision:.4f}")
print(f"召回率: {test_recall:.4f}")
print(f"F1分数: {test_f1:.4f}")


交叉验证的精确率: 0.7452
交叉验证的召回率: 0.7425
交叉验证的F1分数: 0.7415

在测试集上的评估结果：
精确率: 0.7714
召回率: 0.7600
F1分数: 0.7608


# 逻辑回归

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict

# 1. 加载数据
file_path = r"C:\Users\86178\Desktop\工作簿1.xlsx"
df = pd.read_excel(file_path)

# 2. 数据预处理
X = df.iloc[:, 1:].values  # 特征（去掉第一列）
y = df.iloc[:, 0].values   # 标签（第一列）

# 3. 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. 划分训练集和测试集（7:3）
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# 5. 创建逻辑回归模型
model = LogisticRegression(max_iter=1000, random_state=42)

# 6. 五折交叉验证评估模型
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 7. 使用交叉验证进行训练并预测
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)

# 8. 计算精确率、召回率和F1分数
precision = precision_score(y_train, y_pred, average='weighted')
recall = recall_score(y_train, y_pred, average='weighted')
f1 = f1_score(y_train, y_pred, average='weighted')

# 输出评估指标
print(f"交叉验证的精确率: {precision:.4f}")
print(f"交叉验证的召回率: {recall:.4f}")
print(f"交叉验证的F1分数: {f1:.4f}")

# 9. 使用训练集训练模型并在测试集上评估
model.fit(X_train, y_train)
y_test_pred = model.predict(X_test)

# 在测试集上计算精确率、召回率和F1分数
test_precision = precision_score(y_test, y_test_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print("\n在测试集上的评估结果：")
print(f"精确率: {test_precision:.4f}")
print(f"召回率: {test_recall:.4f}")
print(f"F1分数: {test_f1:.4f}")


交叉验证的精确率: 0.8412
交叉验证的召回率: 0.8412
交叉验证的F1分数: 0.8411

在测试集上的评估结果：
精确率: 0.8438
召回率: 0.8425
F1分数: 0.8419


# 分类回归树

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict

# 1. 加载数据
file_path = r"C:\Users\86178\Desktop\工作簿1.xlsx"
df = pd.read_excel(file_path)

# 2. 数据预处理
X = df.iloc[:, 1:].values  # 特征（去掉第一列）
y = df.iloc[:, 0].values   # 标签（第一列）

# 3. 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. 划分训练集和测试集（7:3）
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# 5. 创建决策树分类模型
model = DecisionTreeClassifier(random_state=42)

# 6. 五折交叉验证评估模型
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 7. 使用交叉验证进行训练并预测
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)

# 8. 计算精确率、召回率和F1分数
precision = precision_score(y_train, y_pred, average='weighted')
recall = recall_score(y_train, y_pred, average='weighted')
f1 = f1_score(y_train, y_pred, average='weighted')

# 输出评估指标
print(f"交叉验证的精确率: {precision:.4f}")
print(f"交叉验证的召回率: {recall:.4f}")
print(f"交叉验证的F1分数: {f1:.4f}")

# 9. 使用训练集训练模型并在测试集上评估
model.fit(X_train, y_train)
y_test_pred = model.predict(X_test)

# 在测试集上计算精确率、召回率和F1分数
test_precision = precision_score(y_test, y_test_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print("\n在测试集上的评估结果：")
print(f"精确率: {test_precision:.4f}")
print(f"召回率: {test_recall:.4f}")
print(f"F1分数: {test_f1:.4f}")


交叉验证的精确率: 0.8016
交叉验证的召回率: 0.8015
交叉验证的F1分数: 0.8011

在测试集上的评估结果：
精确率: 0.7770
召回率: 0.7775
F1分数: 0.7768


# XGBOOST

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
import xgboost as xgb
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict

# 1. 加载数据
file_path = r"C:\Users\86178\Desktop\工作簿1.xlsx"
df = pd.read_excel(file_path)

# 2. 数据预处理
X = df.iloc[:, 1:].values  # 特征（去掉第一列）
y = df.iloc[:, 0].values   # 标签（第一列）

# 使用LabelEncoder将标签转换为整数
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. 划分训练集和测试集（7:3）
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)

# 5. 创建XGBoost分类模型
model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')

# 6. 五折交叉验证评估模型
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 7. 使用交叉验证进行训练并预测
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)

# 8. 计算精确率、召回率和F1分数
precision = precision_score(y_train, y_pred, average='weighted')
recall = recall_score(y_train, y_pred, average='weighted')
f1 = f1_score(y_train, y_pred, average='weighted')

# 输出评估指标
print(f"交叉验证的精确率: {precision:.4f}")
print(f"交叉验证的召回率: {recall:.4f}")
print(f"交叉验证的F1分数: {f1:.4f}")

# 9. 使用训练集训练模型并在测试集上评估
model.fit(X_train, y_train)
y_test_pred = model.predict(X_test)

# 在测试集上计算精确率、召回率和F1分数
test_precision = precision_score(y_test, y_test_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print("\n在测试集上的评估结果：")
print(f"精确率: {test_precision:.4f}")
print(f"召回率: {test_recall:.4f}")
print(f"F1分数: {test_f1:.4f}")


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



交叉验证的精确率: 0.8920
交叉验证的召回率: 0.8916
交叉验证的F1分数: 0.8911


Parameters: { "use_label_encoder" } are not used.




在测试集上的评估结果：
精确率: 0.9037
召回率: 0.9025
F1分数: 0.9025


# 随机森林

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict

# 1. 加载数据
file_path = r"C:\Users\86178\Desktop\杜月明.xlsx"
df = pd.read_excel(file_path)

# 2. 数据预处理
X = df.iloc[:, 1:].values  # 特征（去掉第一列）
y = df.iloc[:, 0].values   # 标签（第一列）

# 使用LabelEncoder将标签转换为整数
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 特征标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. 划分训练集和测试集（7:3）
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)

# 5. 创建随机森林分类模型
model = RandomForestClassifier(random_state=42)

# 6. 五折交叉验证评估模型
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 7. 使用交叉验证进行训练并预测
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)

# 8. 计算精确率、召回率和F1分数
precision = precision_score(y_train, y_pred, average='weighted')
recall = recall_score(y_train, y_pred, average='weighted')
f1 = f1_score(y_train, y_pred, average='weighted')

# 输出评估指标
print(f"交叉验证的精确率: {precision:.4f}")
print(f"交叉验证的召回率: {recall:.4f}")
print(f"交叉验证的F1分数: {f1:.4f}")

# 9. 使用训练集训练模型并在测试集上评估
model.fit(X_train, y_train)
y_test_pred = model.predict(X_test)

# 在测试集上计算精确率、召回率和F1分数
test_precision = precision_score(y_test, y_test_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print("\n在测试集上的评估结果：")
print(f"精确率: {test_precision:.4f}")
print(f"召回率: {test_recall:.4f}")
print(f"F1分数: {test_f1:.4f}")


交叉验证的精确率: 0.8121
交叉验证的召回率: 0.8112
交叉验证的F1分数: 0.8093

在测试集上的评估结果：
精确率: 0.8186
召回率: 0.8125
F1分数: 0.8114
