# TensorFlow

## Imports

In [2]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_iris, load_digits
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm

## Loading data

In [17]:
iris = load_iris()
target = LabelBinarizer().fit_transform(iris['target'])
iris = iris['data']
X_train, X_test, y_train, y_test = train_test_split(iris, target, 
                                                    test_size=0.3)

At first let's try to use simple linear model

We will predict Sepal length by sepal width, petal length, petal width

So our model looks like: $\widehat{y} = Wx + b$

$x$ and $y$ is train examples. We represent them by placeholders:

In [3]:
y = tf.placeholder(tf.float32, [None, 1])
x = tf.placeholder(tf.float32, [None, 4])    

$W$ and $b$ is model parameters. We store them in single variable:

In [12]:
w = tf.Variable(tf.random_normal(mean=0., stddev=1., shape=[4, 1]))

And define our model (build graph) as:

In [13]:
y_hat = tf.matmul(x, w)

We will fit out model by a least squares approach: $\sum_{i = 1}^{n}(y - \widehat{y})^2\rightarrow \min$

Lifehack: there exists a formula for solve least squares problem:
$$(X^TX)^{-1}X^Ty$$

In [14]:
XTX = tf.matmul(tf.transpose(x), x)
XTX_inv = tf.matrix_inverse(XTX)
XT = tf.matmul(XTX_inv, tf.transpose(x))
w_opt = tf.matmul(XT, y)

Let's see our optimal parameters

In [15]:
X_train_modified = np.concatenate([X_train[...,1:], np.ones((len(X_train), 1))], 1)
sess = tf.InteractiveSession()
print(sess.run(w_opt, feed_dict={x: X_train_modified, y: X_train[...,:1]}))
sess.close()

[[ 0.67185158]
 [ 0.70308006]
 [-0.52207577]
 [ 1.77599645]]


In the way to optimization, define loss function:

In [41]:
loss = tf.reduce_mean(tf.square(y - y_hat))

We need a guy who will do the optimization

In [82]:
sgd = tf.train.GradientDescentOptimizer(learning_rate=0.0001)\
              .minimize(loss)

In [90]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in tqdm(range(1200)):
    sess.run(sgd, feed_dict={x: X_train_modified, y: X_train[...,:1]})

100%|██████████| 1200/1200 [00:04<00:00, 242.37it/s]


In [97]:
X_test_modified = np.concatenate([X_test[...,1:], np.ones((len(X_test), 1))], 1)
w_grad, loss_grad = sess.run([w, loss], 
                             feed_dict={x: X_test_modified, 
                                        y: X_test[...,:1]})
w_ex, loss_ex = sess.run([w, 
                tf.reduce_mean(tf.square(y - tf.matmul(x, w_opt)))], 
               feed_dict={x: X_test_modified, y: X_test[...,:1]})
print('weights for gradient solution:\n', w_grad)
print('weights for explicit solution:\n', w_ex)
print('test loss for gradient solution:\n', loss_grad)
print('test loss for explicit solution:\n', loss_ex)

weights for gradient solution:
 [[ 0.86514693]
 [ 0.6580798 ]
 [ 1.30885231]
 [-1.34762335]]
weights for explicit solution:
 [[ 0.86514693]
 [ 0.6580798 ]
 [ 1.30885231]
 [-1.34762335]]
test loss for gradient solution:
 2.52269
test loss for explicit solution:
 0.0728428


In [103]:
sess.close()

# Try to predict

As earlier linear model: $z = Wx + b$

but with softmax activation: $\widehat{y} = \text{softmax}(z)$

Again: placeholders

In [110]:
y = tf.placeholder(tf.float32, [None, 3])
x = tf.placeholder(tf.float32, [None, 4])

Weights and biases:

In [111]:
w = tf.Variable(tf.random_normal(mean=0., stddev=1., shape=[4, 3]))
b = tf.Variable(tf.random_normal(mean=0., stddev=1., shape=[3]))

activations:

In [112]:
y_hat = tf.nn.softmax(tf.matmul(x, w) + b)
y_classes = tf.argmax(y_hat, 1)

and loss:

In [113]:
loss = tf.reduce_sum(-tf.reduce_sum(y * tf.log(y_hat), 1))

Define optimizer:

In [114]:
sgd = tf.train.GradientDescentOptimizer(learning_rate=0.0001)\
        .minimize(loss)

Create tf session:

In [115]:
s = tf.InteractiveSession()
s.run(tf.global_variables_initializer())

F1 score on random weights:

In [116]:
f1_score(s.run(y_classes, feed_dict={x: X_test}), np.argmax(y_test, 1), 
         average='macro')

  'recall', 'true', average, warn_for)


0.20952380952380953

Learning:

In [117]:
for i in tqdm(range(1200)):
    s.run(sgd, feed_dict={x: X_train, y: y_train})

100%|██████████| 1200/1200 [00:03<00:00, 316.10it/s]


F1 after learning:

In [118]:
f1_score(s.run(y_classes, feed_dict={x: X_test}), np.argmax(y_test, 1), average='macro')

1.0

In [119]:
s.close()

### Task #1

Using tecniques above, you can create Multi Layer Perceptron (MLP) model and 
solve digits classification problem (you can use `load_digits` function)

# TF Execution

## Sessions

Interactive sessions is always default

In [3]:
session = tf.InteractiveSession()
with session.as_default():
    print(tf.get_default_session())
print(tf.get_default_session())

<tensorflow.python.client.session.InteractiveSession object at 0x7fd510410828>
<tensorflow.python.client.session.InteractiveSession object at 0x7fd510410828>


Unlike classic sessions:

In [5]:
session = tf.Session()
with session.as_default():
    print(tf.get_default_session())
print(tf.get_default_session())

<tensorflow.python.client.session.Session object at 0x7fd510423160>
<tensorflow.python.client.session.InteractiveSession object at 0x7fd510410828>


So these expressions are equivalent:

In [6]:
sess = tf.Session()
with sess.as_default():
    c = tf.constant(1)
    print(sess.run(c))

1


In [7]:
sess = tf.InteractiveSession()
c = tf.constant(1)
print(sess.run(c))
sess.close()

1


## Graphs

In [4]:
graph = tf.Graph()
print(graph)
with graph.as_default():
    print(tf.get_default_graph())
    a = tf.constant([1., 2.])
    f = tf.exp(a)
print(tf.get_default_graph())

<tensorflow.python.framework.ops.Graph object at 0x7fd510423748>
<tensorflow.python.framework.ops.Graph object at 0x7fd510423748>
<tensorflow.python.framework.ops.Graph object at 0x7fd510410ac8>


Unlike sessions, graphs is not resourses

In [8]:
sess = tf.InteractiveSession(graph=graph)
print(sess.run(f))
sess.close()

[ 2.71828175  7.38905621]


## Devices

In [10]:
x = tf.constant([1., 2., 2., 1.], shape=[2, 2])
with tf.device("/gpu:0"):
    y = tf.sin(x)
with tf.device("/cpu:0"):
    z = tf.reduce_sum(y)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    print(sess.run(z))

3.50154


Our micro map-reduce! 
(map on gpu and reduce on cpu)

# Visualization

In [11]:
g = tf.Graph()
with g.as_default():
    with tf.name_scope('placeholders'):
        y = tf.placeholder(tf.float32, [None, 3])
        x = tf.placeholder(tf.float32, [None, 4])
    with tf.name_scope('weights'):
        w = tf.Variable(tf.random_normal(mean=0., stddev=1., 
                                         shape=[4, 3]),
                       name='w')
        b = tf.Variable(tf.random_normal(mean=0., stddev=1., 
                                         shape=[3]),
                       name='b')
    with tf.name_scope('model'):
        y_hat = tf.nn.softmax(tf.matmul(x, w) + b)
        y_classes = tf.argmax(y_hat, 1)


In [12]:
with g.as_default():
    with tf.name_scope('loss'):
        loss = tf.reduce_sum(-tf.reduce_sum(y * tf.log(y_hat), 1))

In [13]:
with g.as_default():
    sgd = tf.train.GradientDescentOptimizer(learning_rate=0.0001)\
            .minimize(loss)

Create scalar summary for loss visualization

In [14]:
with g.as_default():
    train_sum = tf.summary.scalar('loss', loss)

Then create summary writer to save learning process

In [15]:
writer = tf.summary.FileWriter(logdir='./', graph=g)

In [18]:
sess = tf.InteractiveSession(graph=g)
sess.run(tf.global_variables_initializer())
for i in tqdm(range(1200)):
    sess.run(sgd, feed_dict={x: X_train, y: y_train})
    summ = sess.run(train_sum, feed_dict={x: X_test, y: y_test})
    writer.add_summary(summ, i)
writer.flush()

100%|██████████| 1200/1200 [00:06<00:00, 172.15it/s]


# Saving Models

In [19]:
saver = tf.train.Saver()

At first save the graph

In [20]:
with open('graph.proto', 'wb') as f:
    f.write(g.as_graph_def().SerializeToString())

Then save model weights

In [22]:
saver.save(sess, save_path='./model.ckpt')

'./model.ckpt'

In [25]:
sess = tf.Session(graph=g)
saver.restore(sess, './model.ckpt')