# Reorganize Linear Regression in Python mode 
# 使用“Python风格”重新组织线性回归的代码

You have seen what we are doing in class to do linear regression. That is not bad in C++. But it's not a good idea in Python because we were not using Python's features at all.
So, your first task is: rewrite linear regression code in Python. You are not allowed to use "Too Many For Loops", especially when doing calculations.
Write the code in "Python's way". Go ahead and good luck.

Python风格：不要用太多for循环，可以用列表解析代替大段的for循环

In [22]:
import numpy as np
import random

In [23]:
# 预测模型的形式：线性回归
def inference(w, b, x):
    pred_y = w * x + b
    return pred_y

In [1]:
# 评价损失/代价的形式：损失函数cost function——平方误差代价函数
# 通过w，b，x_list得到y的预测值（pred_y），再与y的真实值（gt_y），计算平方距离
def eva_loss(w, b, x_list, gt_y_list):
    avg_loss = 0.0
    avg_loss_list = [0.5 * (inference(w, b, x_list[i]) - gt_y_list[i]) ** 2 for i in range(len(x_list))]
    avg_loss = sum(avg_loss_list)/len(x_list)
    return avg_loss

In [25]:
# 计算损失函数关于第1个系数w、第0个系数b的导数
def gradient(pred_y, gt_y, x):
    diff = pred_y - gt_y
    dw = diff * x
    db = diff
    return dw, db

In [26]:
# 计算梯度下降后的线性回归系数w，b，lr表示学习率
def cal_step_gradient(batch_x_list, batch_gt_y_list, w, b, lr):
    avg_dw, avg_db = 0, 0
    batch_size = len(batch_x_list)
    # 根据给定的x值求预测y
    pred_y_list = [inference(w, b, batch_x_list[i]) for i in range(batch_size)]
    # 计算损失函数关于w与b的导数（基于所有样本）
    for i in range(batch_size):
        dw, db = gradient(pred_y_list[i], batch_gt_y_list[i], batch_x_list[i])
        avg_dw += dw
        avg_db += db
    avg_dw /= batch_size
    avg_db /= batch_size
    # 使用梯度下降法更新w，b
    w -= lr * avg_dw
    b -= lr * avg_db
    return w, b

In [27]:
# 模型训练
def train(x_list, gt_y_list, batch_size, lr, max_iter):
    '''
    x_list:特征组成的列表（这里只有一个特征）
    gt_y_list:真实值
    batch_size:每次训练的数据批量
    lr:学习率
    max_iter:最大迭代次数
    '''
    w, b = 0, 0 
    num_samples = len(x_list)
    # 进行训练的次数最大为max_iter
    for i in range(max_iter):
        batch_idxs = np.random.choice(len(x_list), batch_size) # 从len(x_list)中以相同的概率，随机选择batch_size个的数字，返回它们组成的list
        batch_x = [x_list[j] for j in batch_idxs]
        batch_y = [gt_y_list[j] for j in batch_idxs]
        w, b = cal_step_gradient(batch_x, batch_y, w, b, lr)
        print('w:{0}, b:{1}'.format(w, b))
        print('loss is {0}'.format(eva_loss(w, b, x_list, gt_y_list)))

In [28]:
# 生成样本数据，该数据大致符合线性规律，以用于训练模型
def gen_sample_data():
    w = random.randint(0, 10) + random.random() #后者用于产生[0,1)之间的数，当作噪声。等效于random.uniform(0,10)?
    b = random.randint(0, 5) + random.random()
    # 样本总数
    num_samples = 100 
    # 生成num_samples组个数的随机（x，y）
    x_list = [random.randint(0, 100) * random.random() for i in range(num_samples)]
    y_list = [w * x + b + random.random() * random.randint(-1, 1) for x in x_list] #让y在y=wx+b的直线周围波动
    return x_list, y_list, w, b

In [29]:
# 调用程序
def run():
    # 生成大致符合线性规律的样本数据
    x_list, y_list, w, b = gen_sample_data()
    # 设定学习率
    lr = 0.001
    # 设定最大迭代次数
    max_iter = 10000
    # 设定每次训练的数据批量
    batch_size = 50
    # 模型训练
    train(x_list, y_list, batch_size, lr, max_iter)

In [31]:
# 跑py的时候，跑main下面的；被导入当模块时，main下面不跑，可调用上述函数
if __name__ == '__main__':
    run()

w:3.434584719141268, b:0.08865459458091123
loss is 1819.6443716488016
w:4.981654674382426, b:0.12420342728046066
loss is 39.74868121655895
w:5.2396483551546496, b:0.13064469852924387
loss is 0.15630907755181922
w:5.260091819432975, b:0.131091498335299
loss is 0.16338417882064377
w:5.247586216817983, b:0.1308662556347049
loss is 0.10422184929379162
w:5.250630337860367, b:0.13103562932593554
loss is 0.10271151851371901
w:5.248005513435509, b:0.13103111271812592
loss is 0.10338812058351549
w:5.253089324548167, b:0.1311748573079543
loss is 0.10897680823865291
w:5.246789439097154, b:0.13115865334442778
loss is 0.10625978305657278
w:5.250043764366475, b:0.13133625609990435
loss is 0.10218200832135992
w:5.248033088086863, b:0.1313321504331024
loss is 0.10330580284053971
w:5.252008627639159, b:0.1314671985137098
loss is 0.10539177474883685
w:5.250190947208455, b:0.13149620656600775
loss is 0.10226453412881588
w:5.247405482878935, b:0.13154401137510308
loss is 0.10454183309721404
w:5.2495986147

loss is 0.10163489052321856
w:5.2504474910678605, b:0.1591703371718431
loss is 0.10122738400864428
w:5.245752192815608, b:0.15912616661878026
loss is 0.10552781223210843
w:5.251149604530992, b:0.15934465333470504
loss is 0.10267384768208965
w:5.248264887918584, b:0.1593475443558888
loss is 0.10018225080295672
w:5.250766728372546, b:0.15953503603958658
loss is 0.1018101425186761
w:5.248735920202786, b:0.15960076158545944
loss is 0.09994322861102554
w:5.250690377594979, b:0.15975641943856342
loss is 0.10165221622187914
w:5.251906951077878, b:0.1598555342305548
loss is 0.10485222009841473
w:5.25127913440372, b:0.15988304832851788
loss is 0.10299531671803827
w:5.248106523398179, b:0.1598512473412468
loss is 0.10026688080282979
w:5.247384691928393, b:0.15990194427623064
loss is 0.10119183052845332
w:5.2511693110702415, b:0.16000331782065735
loss is 0.10271358142937351
w:5.247142295686866, b:0.1600505195406228
loss is 0.10161633090872021
w:5.250394616208831, b:0.16019160292960788
loss is 0.1

loss is 0.09926897231330704
w:5.247920090724967, b:0.1742254508715586
loss is 0.09922346311897928
w:5.248961533522711, b:0.17421651421945364
loss is 0.09904641418605951
w:5.246553607346745, b:0.17420404871275053
loss is 0.10127439024110109
w:5.24931005784707, b:0.17430211449476266
loss is 0.09925044627125267
w:5.2474403867909025, b:0.17436913248798208
loss is 0.09969495262958791
w:5.248832768677344, b:0.17443936914622674
loss is 0.09899108910985342
w:5.248238628598806, b:0.17461082833102004
loss is 0.09901500675238896
w:5.245812784961038, b:0.17458870846027297
loss is 0.10319694073227591
w:5.248050531940229, b:0.174689874350826
loss is 0.09910088746830378
w:5.24469138748512, b:0.17469724691025593
loss is 0.10730735221152818
w:5.249375060364332, b:0.17490063699972158
loss is 0.09927881734805917
w:5.248450594732295, b:0.17493647031522475
loss is 0.09893727298430453
w:5.248682322929749, b:0.1750069828436701
loss is 0.09892977546084436
w:5.249570423953945, b:0.17508953771504338
loss is 0.0