In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


### tf.constant() and tf.Variable()
1. Contant values are stored in graph definition
2. Session allocate memory to store variable values

### tf.placeholder and feed_dict
1. Feed values into placeholder with a dictionary(feed_dict)
2. Easy to use but poor performance

## Model
Inference : Y_predicted = w * X + b  
MSE : (y-y_predicted)^2  
Data : birth_life_2010.txt  

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import time

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

import utils

DATA_FILE = 'birth_life_2010.txt'

#Read in data from the .txt file
data, n_samples = utils.read_birth_life_data(DATA_FILE)

X = tf.placeholder(tf.float32, name = 'X')
Y = tf.placeholder(tf.float32, name = 'Y')

w = tf.get_variable('weights', initializer=tf.constant(0.0))
b = tf.get_variable('bias', initializer=tf.constant(0.0))

Y_predicted = w*X+b

loss = tf.square(Y_predicted - Y)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)

start = time.time()

writer = tf.summary.FileWriter('./graphs', tf.get_default_graph())
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(100):
        total_loss = 0
        for x, y in data:
            _, loss_ = sess.run([optimizer,loss], feed_dict={X: x, Y:y})
            total_loss += loss_
        print('Epoch {0}: {1}'.format(i, total_loss/n_samples))
    
    writer.close()
    w_out, b_out = sess.run([w, b])

print('Took: %f seconds' %(time.time() - start))

# uncomment the following lines to see the plot 
plt.plot(data[:,0], data[:,1], 'bo', label='Real data')
plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data')
plt.legend()
plt.show()

Epoch 0: 1661.863764550287
Epoch 1: 956.3224439573916
Epoch 2: 844.6737683409139
Epoch 3: 750.7312372197838
Epoch 4: 667.659830722252
Epoch 5: 594.1417484349327
Epoch 6: 529.0787271179651
Epoch 7: 471.5003584364135
Epoch 8: 420.5458252520938
Epoch 9: 375.45531067297253
Epoch 10: 335.55436177954664
Epoch 11: 300.24627770512666
Epoch 12: 269.00374521501146
Epoch 13: 241.3595776562824
Epoch 14: 216.9003910217238
Epoch 15: 195.25972397061292
Epoch 16: 176.1137731664483
Epoch 17: 159.17551683403158
Epoch 18: 144.19069889799545
Epoch 19: 130.93503690609023
Epoch 20: 119.20935661137888
Epoch 21: 108.83793506244884
Epoch 22: 99.66458668207358
Epoch 23: 91.55171666162971
Epoch 24: 84.37658985632197
Epoch 25: 78.03213362396008
Epoch 26: 72.42178616552172
Epoch 27: 67.46132107331957
Epoch 28: 63.07563027821873
Epoch 29: 59.19871881428714
Epoch 30: 55.77163058824279
Epoch 31: 52.742706123048954
Epoch 32: 50.06563247971506
Epoch 33: 47.70006537150391
Epoch 34: 45.61017402416389
Epoch 35: 43.7637948

<Figure size 640x480 with 1 Axes>

### Hubber Loss
If the difference between the predicted value and the real value is small, square it.  
If its large, take its absoute value.
<pre>
L(y,f(x)) = {(1/2)*(y-f(x))^2       for |y-f(x)| less than equal to d,
            {d|y-f(x)| - 0.5d^2     otherwise
</pre>

tf.cond(pred, fn1, fn2, name=None) -> for conditional statements in tensorflow

In [3]:
def hubber_loss(lables,predictions, delta=0.4):
    residual = tf.abs(lables-prediction)
    def f1(): return 0.5 * tf.square(residual)
    def f2(): return delta * residual - 0.5* tf.square(delta)
    return tf.cond(residual < delta, f1, f2)

### TensorFlow Control Flow 
Since TF builds graphs before computations, we have to specify all possible subgraphs beforehand.  
PyTorch's dynamic graphs and TF eager excecution help overcome this

# tf.data
## Placeholder

Pro : put data processing outside TF, making it easy to do in Python.  
Cons : users often end up processing their data in a single thread and creating data bottleneck that slows execution down.

Insted use : tf.data.Dataset or tf.data.Iterator  


## Optimizers (Training Part)

In [5]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
_, l = sess.run([optimizer, loss], feed_dict= {X:x, Y:y})

Session looks at all trainable variables that loss depends on and update them automatically

## Logistic Regression

Here we will be working on the MNIST Database.   
MNIST Dataset - Collection of images of handwriten digits. Each image is a 28x28 array.


X : Image of handwritten digit  
Y : The digit value

In [6]:
from tensorflow.examples.tutorials.mnist import input_data
MNIST = input_data.read_data_sets('data/mnist', one_hot = True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/mnist/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/mnist/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting data/mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py fr

In [13]:
mnist_folder = 'data/mnist'
utils.download_mnist(mnist_folder)
train, val, test = utils.read_mnist(mnist_folder, flatten = True)

data/mnist/train-images-idx3-ubyte.gz already exists
data/mnist/train-labels-idx1-ubyte.gz already exists
data/mnist/t10k-images-idx3-ubyte.gz already exists
data/mnist/t10k-labels-idx1-ubyte.gz already exists


FileNotFoundError: [Errno 2] No such file or directory: 'data/mnist/train-labels-idx1-ubyte'

In [None]:
train_data = tf.data.Dataset