## Простейшая линейная регрессия
    датасет - Бостонская недвижимость
    
    
    CRIM per capita crime rate by town
    ZN proportion of residential land zoned for lots over 25,000 sq.ft.
    INDUS proportion of non-retail business acres per town
    CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
    NOX nitric oxides concentration (parts per 10 million)
    RM average number of rooms per dwelling
    AGE proportion of owner-occupied units built prior to 1940
    DIS weighted distances to five Boston employment centres
    RAD index of accessibility to radial highways
    TAX full-value property-tax rate per $10,000 
    
    PTRATIO pupil-teacher ratio by town
    B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
    LSTAT % lower status of the population
    MEDV Median value of owner-occupied homes in $1000’s


In [5]:
import tensorflow as tf
import numpy as np
from matplotlib import pylab as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

%matplotlib inline

In [38]:
dataset = load_boston()
dataset['data'].dtype

dtype('float64')

In [51]:
X, y = dataset['data'], dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

X_train = tf.constant(X_train, dtype=tf.float32)
X_test = tf.constant(X_test, dtype=tf.float32)
y_train = tf.constant(y_train, dtype=tf.float32)
y_test = tf.constant(y_test, dtype=tf.float32)

print(X_train)

tf.Tensor(
[[9.72418e+00 0.00000e+00 1.81000e+01 ... 2.02000e+01 3.85960e+02
  1.95200e+01]
 [2.79570e-01 0.00000e+00 9.69000e+00 ... 1.92000e+01 3.96900e+02
  1.35900e+01]
 [1.36781e+01 0.00000e+00 1.81000e+01 ... 2.02000e+01 6.89500e+01
  3.40200e+01]
 ...
 [1.25790e-01 4.50000e+01 3.44000e+00 ... 1.52000e+01 3.82840e+02
  4.56000e+00]
 [1.80028e+00 0.00000e+00 1.95800e+01 ... 1.47000e+01 2.27610e+02
  1.21400e+01]
 [3.75780e-01 0.00000e+00 1.05900e+01 ... 1.86000e+01 3.95240e+02
  2.39800e+01]], shape=(404, 13), dtype=float32)


In [118]:
# инициализация
initializer = tf.keras.initializers.he_normal()

W0 = tf.Variable(initializer(shape=(1, 13)), shape=(1,13), dtype=tf.float32)
b0 = tf.Variable([[0.]], shape=(1,1), dtype=tf.float32)

W, b = W0, b0

h = tf.keras.losses.Huber()
initial_mse = tf.keras.losses.mse(W @ tf.transpose(X_test) + b, y_test) 
initial_mae = tf.keras.losses.mae(W @ tf.transpose(X_test) + b, y_test) 
initial_huber = h(W @ tf.transpose(X_test) + b, y_test) 
print("Initial losses: MSE:{}, MAE:{}, Huber:{}". format(initial_mse, initial_mae, initial_huber))


Initial losses: MSE:[295817.3], MAE:[523.6076], Huber:523.1076049804688


In [119]:
lr = 0.00001

def print_losses(X_train, y_train, X_test, y_test, i):
    mse = tf.keras.losses.mse((W @ tf.transpose(X_train) + b), y_train) 
    mae = tf.keras.losses.mae((W @ tf.transpose(X_train) + b), y_train) 
    huber = h((W @ tf.transpose(X_train) + b), y_train) 
    print('----- Iter: {} ------'.format(i))
    print("Train losses: MSE:{}, MAE:{}, Huber:{}". format(mse, mae, huber))
    
    mse = tf.keras.losses.mse((W @ tf.transpose(X_test) + b), y_test) 
    mae = tf.keras.losses.mae((W @ tf.transpose(X_test) + b), y_test) 
    huber = h((W @ tf.transpose(X_test) + b), y_test) 
    print("Test losses: MSE:{}, MAE:{}, Huber:{}". format(mse, mae, huber))    

for i in range(10001):
    with tf.GradientTape() as tape:
        tape.watch(W)
        tape.watch(b)
        loss = tf.keras.losses.mae(W @ tf.transpose(X_train) + b, y_train)
        
    dW, db = tape.gradient(loss, [W,b])    

    W = W - lr * dW
    b = b - lr * db
    if i % 1000 == 0:
        print_losses(X_train, y_train, X_test, y_test, i)

    

----- Iter: 0 ------
Train losses: MSE:[278689.6], MAE:[508.5758], Huber:508.0758056640625
Test losses: MSE:[292486.38], MAE:[520.5928], Huber:520.0927734375
----- Iter: 1000 ------
Train losses: MSE:[509.13538], MAE:[15.698313], Huber:15.209028244018555
Test losses: MSE:[379.65515], MAE:[14.344528], Huber:13.845393180847168
----- Iter: 2000 ------
Train losses: MSE:[380.38373], MAE:[13.902361], Huber:13.410933494567871
Test losses: MSE:[299.4817], MAE:[12.837703], Huber:12.34174919128418
----- Iter: 3000 ------
Train losses: MSE:[296.43695], MAE:[12.376311], Huber:11.886895179748535
Test losses: MSE:[245.59767], MAE:[11.414646], Huber:10.918208122253418
----- Iter: 4000 ------
Train losses: MSE:[237.2587], MAE:[11.119163], Huber:10.632485389709473
Test losses: MSE:[210.37675], MAE:[10.401517], Huber:9.909993171691895
----- Iter: 5000 ------
Train losses: MSE:[198.66232], MAE:[10.142804], Huber:9.656201362609863
Test losses: MSE:[188.2243], MAE:[9.849783], Huber:9.359939575195312
-----

In [116]:
# а если оптимайзером?
# на тех же итерациях реально сильно круче

In [125]:
W, b = W0, b0
opt = tf.keras.optimizers.Adam()

for i in range(10000):
    opt.minimize(lambda: tf.keras.losses.mae(W @ tf.transpose(X_train) + b, y_train),
                var_list = [W, b])
    if i % 1000 == 0:
        print_losses(X_train, y_train, X_test, y_test, i)    

----- Iter: 0 ------
Train losses: MSE:[663.63983], MAE:[18.349384], Huber:17.852901458740234
Test losses: MSE:[486.8301], MAE:[14.947463], Huber:14.456467628479004
----- Iter: 1000 ------
Train losses: MSE:[119.61671], MAE:[7.317955], Huber:6.84630012512207
Test losses: MSE:[104.43905], MAE:[6.4479156], Huber:5.968722343444824
----- Iter: 2000 ------
Train losses: MSE:[53.977535], MAE:[4.985423], Huber:4.52485466003418
Test losses: MSE:[48.589146], MAE:[4.5617285], Huber:4.101846694946289
----- Iter: 3000 ------
Train losses: MSE:[32.174797], MAE:[3.6498275], Huber:3.196946620941162
Test losses: MSE:[26.827364], MAE:[3.4010084], Huber:2.9418368339538574
----- Iter: 4000 ------
Train losses: MSE:[27.695234], MAE:[3.2548783], Huber:2.815298557281494
Test losses: MSE:[22.58129], MAE:[3.1088185], Huber:2.655048370361328
----- Iter: 5000 ------
Train losses: MSE:[27.141138], MAE:[3.1768181], Huber:2.740269422531128
Test losses: MSE:[22.49558], MAE:[3.0827534], Huber:2.625678777694702
-----