In [None]:
# 引入必要的库
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# 设置字体为SimHei（黑体）
plt.rcParams['font.sans-serif'] = ['SimHei']

# 假设我们有弹簧变形和相应的物体重量数据
spring_deformation = np.array([1.2, 2.0, 3.5, 4.1, 5.0])
object_weight = np.array([10, 20, 35, 40, 50])

# 将数据整理成二维数组的形式
X = spring_deformation.reshape(-1, 1)

# 创建线性回归模型
model = LinearRegression()

# 拟合模型
model.fit(X, object_weight)

# 获取弹性系数和截距
slope = model.coef_[0]
intercept = model.intercept_

# 打印弹性系数和截距
print(f"弹性系数（斜率）：{slope}")
print(f"截距：{intercept}")

# 预测新的弹簧变形对应的物体重量
new_deformation = np.array([3.0]).reshape(-1, 1)
predicted_weight = model.predict(new_deformation)

# 打印预测结果
print(f"当弹簧变形为 {new_deformation[0][0]} 时，预测物体重量为 {predicted_weight[0]}")

# 可视化结果
plt.scatter(spring_deformation, object_weight, color='blue', label='实际数据')
plt.plot(spring_deformation, model.predict(X), color='red', linewidth=2, label='线性回归模型')
plt.scatter(new_deformation, predicted_weight, color='green', marker='x', s=100, label='预测数据')
plt.xlabel('弹簧变形')
plt.ylabel('物体重量')
plt.legend()
plt.show()



In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# 学习时间（小时）
study_hours = np.array([2, 3, 4, 5, 6, 7])

# 是否通过考试（1表示通过，0表示未通过）
pass_exam = np.array([0, 0, 0, 1, 1, 1])

# 将数据整理成二维数组的形式
X = study_hours.reshape(-1, 1)

# 创建逻辑回归模型
model = LogisticRegression()

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, pass_exam, test_size=0.2, random_state=42)

# 拟合模型
model.fit(X_train, y_train)

# 预测测试集
y_pred = model.predict(X_test)

# 计算准确度和混淆矩阵
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# 打印结果
print(f"准确度：{accuracy}")
print(f"混淆矩阵：\n{conf_matrix}")

# 可视化决策边界
plt.scatter(X, pass_exam, color='blue', marker='o', label='实际数据')
plt.plot(X, model.predict_proba(X)[:, 1], color='red', linewidth=2, label='逻辑回归模型')
plt.xlabel('学习时间（小时）')
plt.ylabel('是否通过考试')
plt.legend()
plt.savefig('logic.png',dpi=300)
plt.show()


In [None]:
# 引入必要的库
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# 设置字体为SimHei（黑体），以支持中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
# 设置负号的显示
plt.rcParams['axes.unicode_minus'] = False


# 生成示例数据
np.random.seed(42)
# 假设特征是年龄和犯罪前科次数
ages = np.random.randint(20, 60, 100)
prior_convictions = np.random.randint(0, 2, 100)  # 0表示无前科，1表示有前科
labels = np.logical_xor(ages < 30, prior_convictions)  # 简化的标签生成规则

# 将数据整理成二维数组的形式
X = np.column_stack((ages, prior_convictions))

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# 创建逻辑回归模型
model = LogisticRegression()

# 拟合模型
model.fit(X_train, y_train)

# 预测测试集
y_pred = model.predict(X_test)

# 计算准确度和混淆矩阵
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# 打印结果
print(f"准确度：{accuracy}")
print(f"混淆矩阵：\n{conf_matrix}")

# 在二维特征空间中可视化决策边界
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', edgecolors='k', marker='o', s=50)
plt.xlabel('年龄')
plt.ylabel('犯罪前科次数')

# 画出决策边界
h = .02  # 步长
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)

plt.title('逻辑回归决策边界')
plt.show()


In [None]:
# 引入必要的库
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.datasets import load_digits

# 加载示例图像数据集（手写数字数据集）
digits = load_digits()

# 获取图像数据和标签
X = digits.images.reshape((len(digits.images), -1))
y = digits.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建逻辑回归模型
model = LogisticRegression(max_iter=10000)

# 拟合模型
model.fit(X_train, y_train)

# 预测测试集
y_pred = model.predict(X_test)

# 计算准确度和混淆矩阵
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# 打印结果
print(f"准确度：{accuracy}")
print(f"混淆矩阵：\n{conf_matrix}")

# 显示一些测试集中的图像及其真实标签和预测标签
fig, axes = plt.subplots(1, 10, figsize=(12, 2))
for i in range(10):
    axes[i].imshow(X_test[i].reshape(8, 8), cmap='gray')
    axes[i].set_title(f'True: {y_test[i]}\nPred: {y_pred[i]}')
    axes[i].axis('off')
plt.show()


## 【例5.1】监督学习的模型评估指数计算。

In [None]:

from sklearn import metrics
from pprint import pprint
# 定义真实标签和预测标签
y_true = [0, 1, 1, 0, 1, 0]
y_pred = [0, 1, 0, 0, 1, 1]

rules={
    '混淆矩阵':metrics.confusion_matrix(y_true, y_pred),
    '准确率':metrics.accuracy_score(y_true, y_pred),
    '召回率':metrics.recall_score(y_true, y_pred),
    '精确率':metrics.precision_score(y_true, y_pred),
    'F1分数':metrics.f1_score(y_true, y_pred),
    'AUC':metrics.roc_auc_score(y_true, y_pred),
    'R^2':metrics.r2_score(y_true, y_pred)
}

pprint(rules)


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve
import numpy as np

# 生成一些示例数据
np.random.seed(42)
y_true = np.random.randint(2, size=100)
y_scores = np.random.rand(100)

# 计算 ROC 曲线下的面积
roc_auc = roc_auc_score(y_true, y_scores)

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_true, y_scores)

# 可视化 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

print("AUC:", roc_auc)


In [None]:
fpr

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics

# Define true labels and predicted labels
y_true = [0, 1, 1, 0, 1, 0]
y_pred = [0, 1, 0, 0, 1, 1]

# Calculate ROC curve
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)

# Calculate AUC
roc_auc = metrics.auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.savefig('auc-roc.png', dpi=300)
plt.show()

# Evaluate other metrics
evaluation_metrics = {
    'Confusion Matrix': metrics.confusion_matrix(y_true, y_pred),
    'Accuracy': metrics.accuracy_score(y_true, y_pred),
    'Recall': metrics.recall_score(y_true, y_pred),
    'Precision': metrics.precision_score(y_true, y_pred),
    'F1 Score': metrics.f1_score(y_true, y_pred),
    'AUC': metrics.roc_auc_score(y_true, y_pred),
    'R^2': metrics.r2_score(y_true, y_pred)
}

# Print evaluation metrics
for metric_name, metric_value in evaluation_metrics.items():
    print(f'{metric_name}:')
    print(metric_value,2)
    print('-' * 30)


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import numpy as np

# 生成一些示例数据
np.random.seed(42)
y_true = np.random.randint(2, size=100)
y_scores = np.random.rand(100)

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_true, y_scores)

# 打印 thresholds 数组
print("Thresholds:", thresholds)

# 选择一个阈值并将预测概率转换为二元标签
chosen_threshold = 0.6
binary_predictions = (y_scores >= chosen_threshold).astype(int)

# 打印使用选择的阈值后的二元标签
print("Binary Predictions (using chosen threshold):", binary_predictions)


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import numpy as np

# 生成一些示例数据
np.random.seed(42)
y_true = np.random.randint(2, size=100)
y_scores = np.random.rand(100)

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_true, y_scores)

# 打印 thresholds 数组
print("Thresholds:", thresholds)

# 选择一个阈值并将预测概率转换为二元标签
chosen_threshold = 0.6
binary_predictions = (y_scores >= chosen_threshold).astype(int)

# 打印使用选择的阈值后的二元标签
print("Binary Predictions (using chosen threshold):", binary_predictions)


In [None]:
tpr

In [None]:
fpr

In [None]:
thresholds

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
import numpy as np

# 生成一些示例数据
np.random.seed(42)
y_true_regression = np.random.rand(100)
y_pred_regression = np.random.rand(100)

# 计算回归模型评估指标
mse = mean_squared_error(y_true_regression, y_pred_regression)
mae = mean_absolute_error(y_true_regression, y_pred_regression)
r2 = r2_score(y_true_regression, y_pred_regression)
explained_variance = explained_variance_score(y_true_regression, y_pred_regression)

# 打印指标值
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)
print("Explained Variance:", explained_variance)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from sklearn.linear_model import LinearRegression

# 生成一些示例数据
np.random.seed(42)
X_regression = np.random.rand(100, 1)
y_true_regression = 2 * X_regression.squeeze() + 1 + 0.1 * np.random.randn(100)
y_pred_regression = 2 * X_regression.squeeze() + 1 + 0.1 * np.random.randn(100)

# 训练线性回归模型
model = LinearRegression()
model.fit(X_regression, y_true_regression)
y_pred_regression_model = model.predict(X_regression)

# 计算回归模型评估指标
mse = mean_squared_error(y_true_regression, y_pred_regression_model)
mae = mean_absolute_error(y_true_regression, y_pred_regression_model)
r2 = r2_score(y_true_regression, y_pred_regression_model)
explained_variance = explained_variance_score(y_true_regression, y_pred_regression_model)

# 打印指标值
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)
print("Explained Variance:", explained_variance)

# 绘制散点图
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.scatter(X_regression, y_true_regression, label='Actual', alpha=0.8)
plt.scatter(X_regression, y_pred_regression_model, label='Predicted', alpha=0.8)
plt.title('Scatter Plot of Actual vs Predicted')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()

# 绘制残差图
residuals = y_true_regression - y_pred_regression_model
plt.subplot(1, 2, 2)
sns.residplot(X_regression.squeeze(), residuals, lowess=True)
plt.title('Residual Plot')
plt.xlabel('X')
plt.ylabel('Residuals')

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from sklearn.linear_model import LinearRegression

# 生成一些示例数据
np.random.seed(42)
X_regression = np.random.rand(100, 1)
y_true_regression = 2 * X_regression.squeeze() + 1 + 0.1 * np.random.randn(100)
y_pred_regression = 2 * X_regression.squeeze() + 1 + 0.1 * np.random.randn(100)

# 训练线性回归模型
model = LinearRegression()
model.fit(X_regression, y_true_regression)
y_pred_regression_model = model.predict(X_regression)

# 计算回归模型评估指标
mse = mean_squared_error(y_true_regression, y_pred_regression_model)
mae = mean_absolute_error(y_true_regression, y_pred_regression_model)
r2 = r2_score(y_true_regression, y_pred_regression_model)
explained_variance = explained_variance_score(y_true_regression, y_pred_regression_model)

# 打印指标值
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)
print("Explained Variance:", explained_variance)

# 绘制散点图和残差图
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.scatter(X_regression, y_true_regression, label='Actual', alpha=0.8)
plt.scatter(X_regression, y_pred_regression_model, label='Predicted', alpha=0.8)
plt.title('Scatter Plot of Actual vs Predicted')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()

# 绘制残差图
residuals = y_true_regression - y_pred_regression_model
plt.subplot(1, 2, 2)
sns.residplot(X_regression.squeeze(), residuals, lowess=True)
plt.title('Residual Plot')
plt.xlabel('X')
plt.ylabel('Residuals')

plt.tight_layout()

# 保存图像
plt.savefig('regression_evaluation_plots.png', dpi=300)
plt.show()


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

y_true = [0, 1, 2, 2, 1, 3, 2,4]
y_pred = [0, 1, 2, 2, 0, 1, 2,2]

# y_true 和 y_pred 是真实标签和预测标签
precision_micro = precision_score(y_true, y_pred, average='micro')
precision_macro = precision_score(y_true, y_pred, average='macro')
precision_weighted = precision_score(y_true, y_pred, average='weighted')

print("Precision (Micro):", precision_micro)
print("Precision (Macro):", precision_macro)
print("Precision (Weighted):", precision_weighted)


## 【例5.2】线性回归案例代码。

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np
# 创建一个线性回归模型
model = LinearRegression()
# 训练模型
x = np.array([[1, 2], [2, 4], [3, 6], [4, 8]])
y = np.array([2, 4, 6, 8])
model.fit(x, y)
# 预测新数据
x_new = np.array([[5, 10], [6, 12]])
y_new = model.predict(x_new)
print(y_new)  # 输出预测结果
# 训练模型

# 输出模型参数
print("模型系数 (coefficients):", model.coef_)
print("模型截距 (intercept):", model.intercept_)


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 创建一个线性回归模型
model = LinearRegression()

# 训练模型
x = np.array([[1, 2], [2, 4], [3, 6], [4, 8]])
y = np.array([2, 4, 6, 8])
model.fit(x, y)

# 预测新数据
x_new = np.array([[5, 10], [6, 12]])
y_true = np.array([10, 12])  # 实际的标签值

y_pred = model.predict(x_new)

# 输出预测结果
print("预测结果:", y_pred)

# 计算回归模型评估指标
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

# 输出模型评估指标
print("均方误差 (MSE):", mse)
print("平均绝对误差 (MAE):", mae)
print("R-squared (R2):", r2)

# 输出模型参数
print("模型系数 (coefficients):", model.coef_)
print("模型截距 (intercept):", model.intercept_)


## 【例5.3】逻辑回归案例代码

In [None]:
from sklearn.linear_model import LogisticRegression
import numpy as np
# 创建一个逻辑回归模型
model = LogisticRegression()
# 训练模型
x = np.array([[1, 2], [2, 4], [3, 6], [4, 8]])
y = np.array([0, 0, 1, 1])
model.fit(x, y)
# 预测新数据
x_new = np.array([[5, 10], [6, 12]])
y_new = model.predict(x_new)
print(y_new)  # 输出预测结果
# 输出模型参数
print("模型系数 (coefficients):", model.coef_)
print("模型截距 (intercept):", model.intercept_)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

import numpy as np

# 创建一个逻辑回归模型
model = LogisticRegression()

# 训练模型
x = np.array([[1, 2], [2, 4], [3, 6], [4, 8]])
y = np.array([0, 0, 1, 1])
model.fit(x, y)

# 预测新数据
x_new = np.array([[5, 10], [6, 12]])
y_true = np.array([1, 1])  # 实际的标签值

y_pred = model.predict(x_new)

# 输出预测结果
print("预测结果:", y_pred)

# 输出模型参数
print("模型系数 (coefficients):", model.coef_)
print("模型截距 (intercept):", model.intercept_)

# 计算分类模型评估指标
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)

# 输出分类模型评估指标
print("准确率 (Accuracy):", accuracy)
print("精确率 (Precision):", precision)
print("召回率 (Recall):", recall)
print("F1 分数 (F1 Score):", f1)
print("混淆矩阵 (Confusion Matrix):\n", conf_matrix)


In [None]:
from scipy.io import arff
import pandas as pd

# 读取ARFF文件
data, meta = arff.loadarff('dataset.arff')

# 将数据转换为Pandas DataFrame
df = pd.DataFrame(data)

# 如果有需要，你可以进一步处理DataFrame，例如设置列名等
# df.columns = ...

# 显示 DataFrame 的前几行
print(df.head())


##  【例5.4】朴素贝叶斯分类器的代码示例。

In [None]:
#以下是一个使用scikit-learn库实现朴素贝叶斯分类器的代码示例，用于对鸢尾花数据集进行分类：
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target

# 划分数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 建立高斯朴素贝叶斯分类器
clf = GaussianNB()

# 训练模型
clf.fit(X_train, y_train)

# 预测测试集
y_pred = clf.predict(X_test)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)


【例5.5】决策树案例代码

In [None]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.model_selection import train_test_split
from sklearn.tree import export_graphviz
import graphviz

# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义决策树分类器
clf = DecisionTreeClassifier()

# 训练分类器
clf.fit(X_train, y_train)

# 使用分类器进行预测
y_pred = clf.predict(X_test)

# 计算分类器的准确率
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)

# 将决策树导出为Graphviz格式
dot_data = export_graphviz(clf, out_file=None, feature_names=iris.feature_names, class_names=iris.target_names, filled=True, rounded=True, special_characters=True)

# 使用Graphviz库绘制决策树
graph = graphviz.Source(dot_data)
graph.render("iris_decision_tree", format="png")  # 保存为PNG图片文件
graph.view("iris_decision_tree")  # 打开可视化窗口


## 【例5.6】随机森林案例代码。

In [None]:

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义随机森林分类器
clf = RandomForestClassifier()

# 训练分类器
clf.fit(X_train, y_train)

# 使用分类器进行预测
y_pred = clf.predict(X_test)

# 计算分类器的准确率
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)


##  【例5.7】SVC案例代码。

In [None]:

from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义 SVM 分类器
clf = SVC(kernel='linear')

# 训练分类器
clf.fit(X_train, y_train)

# 使用分类器进行预测
y_pred = clf.predict(X_test)

# 计算分类器的准确率
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)
