### 1. 加载数据

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
# 加载数据
X, y = load_breast_cancer(return_X_y=True)
# 切分数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### 2. 数据预处理

In [3]:
"""
    标准化
        - 对特征进行标准化处理
"""
# 提取需要的参数
mu = X_train.mean(axis=0)
sigma = X_train.std(axis=0)
# 预处理特征
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

### 3. 构建模型

In [4]:
import torch

In [5]:
"""
    1，可学习参数
        - learnable parameter
"""
# 定义权重和偏置
# X: [batch_size, 30]
# 权重的初始化：由高斯分布来初始化
w = torch.randn(30, 2, dtype=torch.float32, requires_grad=True)
# 偏置的初始化：由 0.0 来初始化
b = torch.randn(1, 2, dtype=torch.float32, requires_grad=True)
w, b

(tensor([[ 0.0561, -1.8051],
         [-1.1835, -1.4934],
         [-1.6493, -0.3593],
         [ 0.5362,  1.1840],
         [-0.1008,  0.3511],
         [-0.5242, -0.8618],
         [-0.5087, -0.9030],
         [ 0.5401, -0.2104],
         [ 0.4593,  0.3326],
         [ 0.6572, -0.1415],
         [ 1.6077,  2.0591],
         [ 0.7402,  1.1089],
         [ 0.2698, -0.8037],
         [ 1.1888,  1.8467],
         [ 0.5046, -0.0070],
         [ 1.3602, -0.8984],
         [ 1.1272,  2.0562],
         [ 0.7227, -1.8105],
         [-0.4170,  0.2458],
         [ 0.0731, -0.9951],
         [ 0.2155,  1.0995],
         [-0.4985,  0.0924],
         [ 0.5455, -0.2297],
         [-0.3834, -1.0778],
         [-3.8058,  0.1316],
         [-0.2042, -0.3980],
         [-0.5818, -1.2990],
         [ 1.8019, -1.4017],
         [-1.3558,  1.7553],
         [-0.5783, -0.6208]], requires_grad=True),
 tensor([[-0.0723,  0.2876]], requires_grad=True))

In [6]:
"""
    2，处理逻辑
        - 算法思想
"""
def model(X):
    """
        定义模型的处理逻辑
    """
    return X @ w + b

### 4. 准备训练

In [7]:
# 训练步数
steps = 2000
# 学习率: 适当压缩偏导数，防止梯度爆炸
learning_rate = 1e-2

In [8]:
"""
    数据转张量
        - 全量训练
"""
X_train = torch.tensor(data=X_train, dtype=torch.float32)
X_test = torch.tensor(data=X_test, dtype=torch.float32)
y_train = torch.tensor(data=y_train, dtype=torch.long)
y_test = torch.tensor(data=y_test, dtype=torch.long)

In [9]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([455, 30]),
 torch.Size([114, 30]),
 torch.Size([455]),
 torch.Size([114]))

In [10]:
def get_cross_entropy(y_pred, y_true):
    """
        衡量分类问题的误差
        - 批量的交叉熵
            - 每个样本的损失的平均值
    """
    # 第 1 步：真实标签 转 one-hot
    # eye是单位矩阵
    y_true = torch.eye(n=2)[y_true]
    # 第 2 步：原始输出 转 概率(softmax), dim=1的意思是，求每一行的合
    y_pred = torch.exp(y_pred) / torch.exp(y_pred).sum(dim=1, keepdim=True) + 1e-9
    # 第 3 步：求交叉熵
    each_cross_entropy = (y_true * torch.log(1 / y_pred)).sum(dim=1)
    # 第 4 步：求平均交叉熵
    cross_entropy = each_cross_entropy.mean()
    # 返回即可
    return cross_entropy

### 4. 过程监控

In [11]:
def get_acc(X, y):
    """
        计算准确率
    """
    with torch.no_grad():
        # 1，正向传播
        y_pred = model(X=X)
        # 2，解析结果
        y_pred = y_pred.argmax(dim=1)
        # 3, 计算准确率
        acc = (y == y_pred).to(dtype=torch.float32).mean().item()
    return acc

### 5. 训练过程

In [12]:
def train():
    # 训练前，测试一下准确率
    train_acc = get_acc(X=X_train, y=y_train)
    test_acc = get_acc(X=X_test, y=y_test)
    print(f"开始训练之前，train_acc: {train_acc}, test_acc: {test_acc}")
    for step in range(steps):
        # 1, 正向传播
        y_pred = model(X=X_train)
        
        # 2，计算损失
        loss = get_cross_entropy(y_pred=y_pred, y_true=y_train)
        
        # 3, 反向传播
        loss.backward()
        
        # 4, 优化一步
        w.data -= learning_rate * w.grad
        b.data -= learning_rate * b.grad

        # 5，清空梯度
        w.grad.zero_()
        b.grad.zero_()

        # 6, 模型评估
        train_acc = get_acc(X=X_train, y=y_train)
        test_acc = get_acc(X=X_test, y=y_test)
        print(f"训练了{step + 1}轮，train_acc: {train_acc}, test_acc: {test_acc}")

In [13]:
train()

开始训练之前，train_acc: 0.7032967209815979, test_acc: 0.6842105388641357
训练了1轮，train_acc: 0.7054945230484009, test_acc: 0.6842105388641357
训练了2轮，train_acc: 0.7076923251152039, test_acc: 0.6842105388641357
训练了3轮，train_acc: 0.7098901271820068, test_acc: 0.6929824352264404
训练了4轮，train_acc: 0.7142857313156128, test_acc: 0.6929824352264404
训练了5轮，train_acc: 0.7186813354492188, test_acc: 0.6929824352264404
训练了6轮，train_acc: 0.7186813354492188, test_acc: 0.6929824352264404
训练了7轮，train_acc: 0.7208791375160217, test_acc: 0.6929824352264404
训练了8轮，train_acc: 0.7252747416496277, test_acc: 0.7017543911933899
训练了9轮，train_acc: 0.7274725437164307, test_acc: 0.7017543911933899
训练了10轮，train_acc: 0.7318681478500366, test_acc: 0.7017543911933899
训练了11轮，train_acc: 0.7384615540504456, test_acc: 0.7017543911933899
训练了12轮，train_acc: 0.7384615540504456, test_acc: 0.7017543911933899
训练了13轮，train_acc: 0.7384615540504456, test_acc: 0.7017543911933899
训练了14轮，train_acc: 0.7428571581840515, test_acc: 0.7017543911933899
训练了1