线性回归是这样 $ y=w_1x_1 + w_2x_2 + ... + w_n x_n + b $，它的结果取值范围是 $-\infty到+ \infty$ ，在逻辑回归里，无法使用上述公式

In [1]:
from sklearn.datasets import load_breast_cancer

In [2]:
X, y = load_breast_cancer(return_X_y=True)

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [5]:
"""
    标准化
"""
mu = X_train.mean(axis=0)
sigma = X_train.std(axis=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

### 2. 建模

In [6]:
import torch

In [7]:
"""
    定义参数
"""
# 定义权重和偏置
# 输入是30个特征所以有30个权重，b是一个， 结果是分 2 个类
w = torch.randn(30, 2, requires_grad=True)
b = torch.randn(1, 2, requires_grad=True)
w, b

(tensor([[-0.3366,  0.5572],
         [ 0.9123, -2.5170],
         [-1.1020,  0.2047],
         [-1.1110,  0.2259],
         [-0.6864,  0.1164],
         [-0.3511,  0.3468],
         [-0.7381, -1.2292],
         [ 1.5486, -0.0643],
         [ 0.3490, -0.0489],
         [-0.2236,  0.2154],
         [ 0.2587, -1.3273],
         [ 2.2423, -1.0696],
         [ 0.4671,  0.7849],
         [-0.1879,  1.5467],
         [-1.4005, -0.9136],
         [ 0.6068,  0.8489],
         [-0.6944, -0.1727],
         [ 0.7837,  0.4345],
         [ 1.5288, -0.6410],
         [-0.2343,  0.0889],
         [-1.1814,  0.8461],
         [ 0.0081, -0.5977],
         [-0.2665, -1.1691],
         [-2.9868,  0.1456],
         [-0.2231, -1.1171],
         [ 1.7536,  0.2196],
         [-1.6859,  1.2410],
         [-0.9966,  0.3887],
         [-0.6559,  0.5510],
         [-0.6179, -0.0725]], requires_grad=True),
 tensor([[1.3720, 0.2714]], requires_grad=True))

In [10]:
"""
    定义模型
"""
def model(X):
    return X @ w + b

In [11]:
steps = 100000
learning_rate = 1e-2

In [12]:
X_train = torch.tensor(data=X_train, dtype=torch.float32)

In [13]:
def one_hot(y):
    """
        类别需要使用one-hot编码
        torch.eye(): 创建一个 2x2 的单位矩阵
        torch.stack(): 沿新维度堆叠一系列张量
    """
    result = torch.stack(tensors=[torch.eye(2)[label] for label in y], dim=0)
    return result

In [42]:
one_hot(y_train)

tensor([[0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0

In [14]:
def softmax(x):
    """
        概率模拟：模型的原始输出使用softmax模拟概率
        torch.exp() 用于计算输入张量中每个元素的自然指数（以 e 为底的指数）
    """
    return torch.exp(x) / torch.exp(x).sum()

In [15]:
def cross_entropy(y_pred, y_true):
    """
        1.分类问题的loss计算不用MSE而是用 交叉熵。
        2.MSE是从欧式距离的角度去衡量两个回归量的差别、交叉熵是从概率分布的角度衡量分类的概率问题。
        3.此处是衡量分类问题的误差
    """
    pass