# 复现从头开始实现线性回归

## 任务分解：
- 构建数据集：构造一个函数，并制造对应大小的数据集，并且引入噪声
- 实现SGD（小批量随机梯度下降优化器）
- 训练并测试

In [1]:
import torch
import random

## 构建数据集
我们使用 $y=W*x+b,w^T=[2,-3.1,4],b=4.2$ 作为核心函数

In [2]:
def datasets_function(w,b,num_examples):
    x = torch.normal(0,1,(num_examples,len(w)))
    y = torch.matmul(x,w)+b
    y += torch.normal(0,0.01,y.shape)
    return x,y.reshape((-1,1))

w_true = torch.tensor([2,-3.1,4])
b_true = 4.2
features,labels = datasets_function(w_true , b_true , 2000)

In [3]:
print('features',features[0],"labels",labels[0])

features tensor([1.9451, 1.0990, 0.3316]) labels tensor([6.0009])


## 实现SGD

### 分批次读取数据集的内容
设置一个batch_size然后分批次读取

In [4]:
def batch_reader(features,labels,batchsize):
    index = list(range(len(features)))
    random.shuffle(index)
    for i in range(0,len(features),batchsize):
        batch_tensor = torch.tensor(index[i:min(i+batchsize,len(features))])
        yield features[batch_tensor],labels[batch_tensor]

### 实现损失函数

In [5]:
def loss(y,y_real):
    return (y_real.reshape(y.shape)-y)** 2 / 2

### 定义梯度下降和Backpropagation

In [6]:
def SGD(batchsize,lr,params):
    with torch.no_grad():
        for param in params:
            param -= lr*param.grad / batchsize
            param.grad.zero_()

## 训练

### 模型初始化

In [7]:
w = torch.normal(2,0.01,size=(3,1),requires_grad = True)
b = torch.zeros(1,requires_grad = True)

### 定义超参数

In [8]:
lr = 0.01
epoches = 50
batch_size = 40

### 训练

In [9]:
for epoch in range (epoches):
    for x,y in batch_reader(features , labels , batch_size):
        l = loss(torch.matmul(x,w)+b,y)
        l.sum().backward()
        SGD(batch_size,lr,[w,b])
    with torch.no_grad():
        train_l = loss(torch.matmul(x,w)+b,y)
        print(f'epoch{epoch+1},loss{float(train_l.mean()):f}')

epoch1,loss7.481070
epoch2,loss2.917686
epoch3,loss1.516930
epoch4,loss0.504966
epoch5,loss0.152874
epoch6,loss0.055254
epoch7,loss0.030557
epoch8,loss0.015588
epoch9,loss0.005223
epoch10,loss0.002142
epoch11,loss0.000745
epoch12,loss0.000350
epoch13,loss0.000097
epoch14,loss0.000077
epoch15,loss0.000054
epoch16,loss0.000094
epoch17,loss0.000048
epoch18,loss0.000050
epoch19,loss0.000067
epoch20,loss0.000068
epoch21,loss0.000052
epoch22,loss0.000057
epoch23,loss0.000056
epoch24,loss0.000053
epoch25,loss0.000058
epoch26,loss0.000050
epoch27,loss0.000065
epoch28,loss0.000044
epoch29,loss0.000052
epoch30,loss0.000051
epoch31,loss0.000057
epoch32,loss0.000055
epoch33,loss0.000041
epoch34,loss0.000051
epoch35,loss0.000046
epoch36,loss0.000041
epoch37,loss0.000061
epoch38,loss0.000050
epoch39,loss0.000058
epoch40,loss0.000043
epoch41,loss0.000041
epoch42,loss0.000046
epoch43,loss0.000058
epoch44,loss0.000060
epoch45,loss0.000046
epoch46,loss0.000026
epoch47,loss0.000052
epoch48,loss0.000076
e

## 结果

In [10]:
print(w,b)
print(f'w的误差:{w_true - w.reshape(w_true.shape)}')
print(f'b的误差:{b_true - b}')

tensor([[ 2.0003],
        [-3.1000],
        [ 3.9998]], requires_grad=True) tensor([4.2000], requires_grad=True)
w的误差:tensor([-3.1161e-04,  3.4571e-05,  1.5330e-04], grad_fn=<SubBackward0>)
b的误差:tensor([1.7643e-05], grad_fn=<RsubBackward1>)
