### 1. 加载数据

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
# 加载数据
X, y = load_breast_cancer(return_X_y=True)
# 切分数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### 2. 数据预处理

In [3]:
"""
    标准化
        - 对特征进行标准化处理
"""
# 提取需要的参数
mu = X_train.mean(axis=0)
sigma = X_train.std(axis=0)
# 预处理特征
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

### 3. 构建模型

In [4]:
import torch
from torch import nn

In [5]:
# 在V1中是自己实现了模型，实际中几乎不会那么些，实际情况如下：直接引入模型使用
# input30个特征，output分2类。 这个就是线性层。
model = nn.Linear(in_features=30, out_features=2)

In [6]:
model

Linear(in_features=30, out_features=2, bias=True)

In [7]:
# 权重
model.weight, model.weight.shape

(Parameter containing:
 tensor([[-0.1691,  0.1356, -0.1293,  0.1630,  0.0865,  0.0282,  0.0184,  0.0105,
          -0.0972, -0.0629,  0.0327, -0.0730, -0.1175,  0.1272,  0.0557,  0.1294,
           0.0443,  0.1193, -0.0422,  0.0369,  0.1700,  0.0327, -0.1695,  0.0311,
           0.0160,  0.0624, -0.0936, -0.1264,  0.1237,  0.0577],
         [ 0.0916,  0.0542,  0.0121, -0.1256,  0.0117,  0.0143,  0.1646, -0.0613,
          -0.0347,  0.0972,  0.0558,  0.1168,  0.1597,  0.1333,  0.1290,  0.1140,
          -0.0253, -0.1126,  0.0659,  0.1317, -0.1795, -0.0067, -0.1657,  0.0910,
           0.1325, -0.1712, -0.1791,  0.0898,  0.1225, -0.1076]],
        requires_grad=True),
 torch.Size([2, 30]))

In [8]:
# 偏置
model.bias, model.bias.shape

(Parameter containing:
 tensor([0.0868, 0.1728], requires_grad=True),
 torch.Size([2]))

### 4. 准备训练

In [9]:
# 训练步数
steps = 200
# 优化器：把减梯度 和 清空梯度，封装成一个优化器
# SGD 随机梯度下降
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)
# 损失函数:在前面V1中写的 get_cross_entropy()函数不用自己写，nn已封装好，可直接使用
loss_fn = nn.CrossEntropyLoss()

In [10]:
"""
    数据转张量
        - 全量训练
"""
X_train = torch.tensor(data=X_train, dtype=torch.float32)
X_test = torch.tensor(data=X_test, dtype=torch.float32)
y_train = torch.tensor(data=y_train, dtype=torch.long)
y_test = torch.tensor(data=y_test, dtype=torch.long)

### 4. 过程监控

In [11]:
def get_acc(X, y):
    """
        计算准确率
    """
    with torch.no_grad():
        # 1，正向传播
        y_pred = model(X)
        # 2，解析结果
        y_pred = y_pred.argmax(dim=1)
        # 3, 计算准确率
        acc = (y == y_pred).to(dtype=torch.float32).mean().item()
    return acc

### 5. 训练过程

In [12]:
def train():
    # 训练前，测试一下准确率
    train_acc = get_acc(X=X_train, y=y_train)
    test_acc = get_acc(X=X_test, y=y_test)
    print(f"开始训练之前，train_acc: {train_acc}, test_acc: {test_acc}")
    for step in range(steps):
        # 1, 正向传播
        y_pred = model(X_train)
        # 2，计算损失
        loss = loss_fn(y_pred, y_train)
        # 3, 反向传播
        loss.backward()
        # 4, 优化一步
        optimizer.step()
        # 5，清空梯度
        optimizer.zero_grad()

        # 6, 模型评估
        train_acc = get_acc(X=X_train, y=y_train)
        test_acc = get_acc(X=X_test, y=y_test)
        print(f"训练了{step + 1}轮，train_acc: {train_acc}, test_acc: {test_acc}")

In [13]:
train()

开始训练之前，train_acc: 0.6527472734451294, test_acc: 0.640350878238678
训练了1轮，train_acc: 0.6615384817123413, test_acc: 0.6491228342056274
训练了2轮，train_acc: 0.6615384817123413, test_acc: 0.6491228342056274
训练了3轮，train_acc: 0.6659340858459473, test_acc: 0.6666666865348816
训练了4轮，train_acc: 0.6681318879127502, test_acc: 0.6754385828971863
训练了5轮，train_acc: 0.6769230961799622, test_acc: 0.6842105388641357
训练了6轮，train_acc: 0.6769230961799622, test_acc: 0.7017543911933899
训练了7轮，train_acc: 0.6747252941131592, test_acc: 0.7105262875556946
训练了8轮，train_acc: 0.6769230961799622, test_acc: 0.7105262875556946
训练了9轮，train_acc: 0.6857143044471741, test_acc: 0.7105262875556946
训练了10轮，train_acc: 0.696703314781189, test_acc: 0.7105262875556946
训练了11轮，train_acc: 0.7010989189147949, test_acc: 0.7105262875556946
训练了12轮，train_acc: 0.7054945230484009, test_acc: 0.7105262875556946
训练了13轮，train_acc: 0.7120879292488098, test_acc: 0.719298243522644
训练了14轮，train_acc: 0.7142857313156128, test_acc: 0.7368420958518982
训练了15轮，

### 6, 模型保存

In [14]:
"""
    1，模型整体保存和加载
        - 不推荐
"""
# 保存模型
torch.save(obj=model, f="model.lxh")
# 加载模型
m = torch.load(f="model.lxh", weights_only=False)

In [15]:
"""
    2, 参数和网络分离式保存
        - 骨肉分离
"""
# 1, 保存权重
# torch.save(obj=model.state_dict(), f="model.pt")

# 加载模型：1，构建模型(随机初始化)。 2，把加载的权重 替换给刚刚初始化的模型里
m = nn.Linear(in_features=30, out_features=2)
m.load_state_dict(state_dict=torch.load(f="model.pt", weights_only=True))

<All keys matched successfully>

### 7. 推理流程

In [16]:
# 初始化模型
m = nn.Linear(in_features=30, out_features=2)

# 加载训练好的权重
m.load_state_dict(state_dict=torch.load(f='./model.pt', weights_only=True))

def predict(X):
    """
        推理流程
    """
    # 类型校验. Tensor是强类型，所以本身就是tensor却又进行转换就会出警告，为了不看到这个警告 所以先对类型进行校验
    if not isinstance(X, torch.Tensor):
        X = torch.tensor(data=X, dtype=torch.float32)
    # 数据结构判断 [batch_size, num_features]
    if X.ndim !=2 or X.size(1) != 30:
        raise ValueError("输入数据有误！！！")
    # 模型推理
    y_pred = m(X)
    y_pred = y_pred.argmax(dim=1)
    return y_pred

In [17]:
y_pred = predict(X=X_test)

In [18]:
y_test

tensor([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
        0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1,
        1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
        1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
        0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1])

In [19]:
from sklearn.metrics import accuracy_score

In [20]:
acc = accuracy_score(y_true=y_test, y_pred=y_pred)
acc

0.9473684210526315