In [1]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import StratifiedShuffleSplit as SSS
from sklearn.preprocessing import StandardScaler
%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime as dt

In [2]:
housing = fetch_california_housing()

In [3]:
housing.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR'])

In [4]:
housing.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [5]:
housing_data = housing.data
housing_data.shape

(20640, 8)

In [6]:
#train/test split method based on stratas
housing_strat = housing_data.copy()
housing_strat = np.c_[housing_strat,np.ceil(housing_data[:,0] / 1.5)]
unique, counts = np.unique(housing_strat[:,8], return_counts=True)
print(np.asarray((unique, counts)).T)
housing_strat = np.c_[housing_strat, housing.target]

[[1.000e+00 8.220e+02]
 [2.000e+00 6.581e+03]
 [3.000e+00 7.236e+03]
 [4.000e+00 3.639e+03]
 [5.000e+00 1.423e+03]
 [6.000e+00 5.320e+02]
 [7.000e+00 1.890e+02]
 [8.000e+00 1.050e+02]
 [9.000e+00 5.000e+01]
 [1.000e+01 1.400e+01]
 [1.100e+01 4.900e+01]]


In [7]:
split = SSS(n_splits = 1, test_size = 0.2, random_state = 42)
for train_index, test_index in split.split(housing_strat, housing_strat[:,8]):
    strat_train_set = housing_strat[train_index,:]
    strat_test_set = housing_strat[test_index,:]

In [8]:
housing_train_target, housing_test_target = strat_train_set[:,9], strat_test_set[:,9]
housing_train_data, housing_test_data = strat_train_set[:,:9], strat_test_set[:,:9]

In [9]:
scaler = StandardScaler()
scaler.fit(housing_train_data)
housing_train_data = scaler.transform(housing_train_data)
housing_test_data = scaler.transform(housing_test_data)

In [10]:
m, n = housing_train_data.shape
m_test, n_test = housing_test_data.shape
housing_train_data = np.c_[np.ones((m,1)), housing_train_data]
housing_test_data = np.c_[np.ones((m_test,1)), housing_test_data]
n_epoch = 1000
learning_rate = 0.01

In [11]:
'''tf.reset_default_graph()
X = tf.constant(housing_train_data, dtype = tf.float32, name = "X")
y = tf.constant(housing_train_target.reshape(-1,1), dtype = tf.float32, name = "y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "prediction")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")
#gradients = 2/m * tf.matmul(tf.transpose(X), error)
#gradients = tf.gradients(mse, [theta])[0]
#training_op = tf.assign(theta, theta - learning_rate*gradients)
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()'''

'tf.reset_default_graph()\nX = tf.constant(housing_train_data, dtype = tf.float32, name = "X")\ny = tf.constant(housing_train_target.reshape(-1,1), dtype = tf.float32, name = "y")\ntheta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")\ny_pred = tf.matmul(X, theta, name = "prediction")\nerror = y_pred - y\nmse = tf.reduce_mean(tf.square(error), name = "mse")\n#gradients = 2/m * tf.matmul(tf.transpose(X), error)\n#gradients = tf.gradients(mse, [theta])[0]\n#training_op = tf.assign(theta, theta - learning_rate*gradients)\noptimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)\ntraining_op = optimizer.minimize(mse)\ninit = tf.global_variables_initializer()'

In [12]:
'''with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epoch):
        if epoch % 100 == 0:
            print("Epoch ", epoch, " MSE = ", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()'''

'with tf.Session() as sess:\n    sess.run(init)\n    for epoch in range(n_epoch):\n        if epoch % 100 == 0:\n            print("Epoch ", epoch, " MSE = ", mse.eval())\n        sess.run(training_op)\n    best_theta = theta.eval()'

In [13]:
def fetch_batch(epoch, batch_index, batch_size, n_batches, X, y):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size = batch_size)  # not shown
    X_batch = X[indices, :] # not shown
    y_batch = y[indices, :] # not shown
    return X_batch, y_batch

In [17]:
now = dt.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "./tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape = (None, n+1), name = "X")
y = tf.placeholder(tf.float32, shape = (None, 1), name = "y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "prediction")
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name = "mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
saver = tf.train.Saver()

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [18]:
batch_size = 100
n_batch = int(np.ceil(m / batch_size))
with tf.Session() as sess:
    sess.run(init)
    for e in range(n_epoch):
        if e % 100 == 0:
            save_path = saver.save(sess,".\my_model.ckpt")
            print("Epoch ", e, " MSE = ", sess.run(mse, feed_dict = {X : housing_train_data, y : housing_train_target.reshape(-1,1)}))
        for batch_index in range(n_batch):
            X_batch, y_batch = fetch_batch(e, batch_index, batch_size, n_batch, housing_train_data, housing_train_target.reshape(-1,1))
            sess.run(training_op, feed_dict = {X : X_batch, y : y_batch})
            if batch_index % 10 == 0 :
                summary_str = mse_summary.eval(feed_dict = {X : X_batch, y : y_batch})
                step = e * n_batch + batch_index
                file_writer.add_summary(summary_str, step)
    best_theta = theta.eval()
    save_path = saver.save(sess, ".\my_model_final.ckpt")
file_writer.close()

Epoch  0  MSE =  5.133388
Epoch  100  MSE =  0.53154534
Epoch  200  MSE =  0.52904093
Epoch  300  MSE =  0.53023237
Epoch  400  MSE =  0.5274053
Epoch  500  MSE =  0.5306007
Epoch  600  MSE =  0.5288235
Epoch  700  MSE =  0.5294174
Epoch  800  MSE =  0.5289626
Epoch  900  MSE =  0.5271695


In [16]:
with tf.Session() as sess:
    saver.restore(sess, ".\my_model_final.ckpt")
    print("MSE test = ", sess.run(mse, feed_dict = {X : housing_test_data, y : housing_test_target.reshape(-1, 1)}))

INFO:tensorflow:Restoring parameters from .\my_model_final.ckpt
MSE test =  0.5166749


In [19]:
print(error.op.name)

loss/sub
