In [1]:
import pandas as pd
import numpy as np

def normalize_feature(df):
    return df.apply(lambda column: (column - column.mean()) / column.std())

df = normalize_feature(pd.read_csv('datas/04/train_3d.csv', names = ['square', 'bedrooms', 'price']))

ones = pd.DataFrame({'ones': np.ones(len(df))})
df = pd.concat([ones,df], axis=1)

df.head()

Unnamed: 0,ones,square,bedrooms,price
0,1.0,-0.664869,-0.411568,0.005698
1,1.0,1.818707,0.708041,0.649258
2,1.0,1.686088,-0.411568,0.595628
3,1.0,1.336459,1.827651,0.474961
4,1.0,-0.713093,1.827651,-0.250385


### 数据处理：获取x和y

In [2]:
X_data = np.array(df[df.columns[0:3]])
y_data = np.array(df[df.columns[-1]]).reshape(len(df), 1)

print(X_data.shape, type(X_data))
print(y_data.shape, type(y_data))

(10000, 3) <class 'numpy.ndarray'>
(10000, 1) <class 'numpy.ndarray'>


### 创建线性回归模型（数据流图）

In [3]:
import tensorflow as tf

alpha = 0.01 # 学习率
epoch = 400 # 训练全量数据集的轮数

# 输入 X，形状[10000,3]
X = tf.placeholder(tf.float32, X_data.shape)
# 输入 y，形状[10000,1]
y = tf.placeholder(tf.float32, y_data.shape)

# weight [3, 1]
W = tf.get_variable('weights',(X_data.shape[1], 1), initializer=tf.constant_initializer())

# 假设函数 h(x) = w_0 * x_0 + w_1 * x_1 + w_2 * x_2, 其中x_0恒为1
# 推理值 y_pred [1000,1]
y_pred = tf.matmul(X, W)

# 损失函数采用最小二乘法，y_pred - y 是形如[1000,1]的向量
# tf.matmul(a, b, transpose_a=True) 表示：矩阵a的转置乘矩阵b，即 [1,1000] x [1000,1]
# 损失函数操作 loss
loss_op = 1 / (2 * len(X_data)) * tf.matmul((y_pred - y), (y_pred - y), transpose_a=True)

# 随机梯度下降优化器 opt
opt = tf.train.GradientDescentOptimizer(learning_rate=alpha)

# 单步训练操作 train_op
train_op = opt.minimize(loss_op)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### 创建会话（运行环境）

In [4]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    writer = tf.summary.FileWriter('./summary/linear-regression-6-1', sess.graph)
    
    for e in range(1, epoch + 1):
        sess.run(train_op, feed_dict={X: X_data, y: y_data})
        if e % 10 == 0:
            loss, w = sess.run([loss_op, W], feed_dict={X: X_data, y: y_data})
            log_str = "Epoch_%d \t Loss=%.4g \t Model: y = %.4gx1 + %.4gx2 + %.4g"
            print(log_str % (e, loss, w[1], w[2], w[0]))
            
writer.close()

Epoch_10 	 Loss=0.4596 	 Model: y = 0.05679x1 + 0.02882x2 + 2.765e-10
Epoch_20 	 Loss=0.4291 	 Model: y = 0.1069x1 + 0.0524x2 + 3.658e-10
Epoch_30 	 Loss=0.4059 	 Model: y = 0.1513x1 + 0.07152x2 + 3.859e-10
Epoch_40 	 Loss=0.3881 	 Model: y = 0.1906x1 + 0.08687x2 + 3.606e-10
Epoch_50 	 Loss=0.3746 	 Model: y = 0.2255x1 + 0.09902x2 + 1.516e-10
Epoch_60 	 Loss=0.3641 	 Model: y = 0.2566x1 + 0.1085x2 + -4.278e-11
Epoch_70 	 Loss=0.3559 	 Model: y = 0.2843x1 + 0.1157x2 + -2.494e-10
Epoch_80 	 Loss=0.3495 	 Model: y = 0.309x1 + 0.1209x2 + -3.376e-10
Epoch_90 	 Loss=0.3445 	 Model: y = 0.3312x1 + 0.1246x2 + -3.82e-10
Epoch_100 	 Loss=0.3405 	 Model: y = 0.3511x1 + 0.1269x2 + -5.777e-10
Epoch_110 	 Loss=0.3373 	 Model: y = 0.3691x1 + 0.1282x2 + -7.139e-10
Epoch_120 	 Loss=0.3347 	 Model: y = 0.3853x1 + 0.1285x2 + -9.823e-10
Epoch_130 	 Loss=0.3325 	 Model: y = 0.3999x1 + 0.1281x2 + -1.124e-09
Epoch_140 	 Loss=0.3308 	 Model: y = 0.4132x1 + 0.127x2 + -1.142e-09
Epoch_150 	 Loss=0.3293 	 Model: