# GEC Data Science Program
# Level 2, Lab 2

## TensorFlow

https://www.tensorflow.org/

￼● Tensor: multidimensional array
    - a scalar is a tensor
    - a vector (or list, or array) is also a tensor
    - a matrix (or lists of lists, or 2d array) is a tensor
    - so is a cube, and so on

￼￼￼● Computation is defined as a Directed Acyclic Graph (DAG)

● The graph is a description (blueprint) of the computations to be executed

● Edges represent data (i.e., tensors) flowing between nodes in the graph

● Nodes represent Operations (Ops)

In [None]:
import tensorflow as tf

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [None]:
tf.reset_default_graph()

#### Example: c=a+b 

In [None]:
a = tf.constant([3],name='a')
b = tf.constant([5],name="b")
c = tf.add(a,b, name="c")

In [None]:
with tf.Session() as sess:
    print sess.run(c)

#### Graph Visualization by Tensorboard

In [None]:
sess = tf.InteractiveSession()

In [None]:
graph = tf.get_default_graph().as_graph_def()
summary_writer = tf.summary.FileWriter('.',graph)
summary_writer.flush()

In [None]:
!pwd

Run this in a terminal:
```sh
tensorboard  --logdir=[use above path in your system]
```

Open this page: http://localhost:6006/#graphs

In [None]:
tf.reset_default_graph()

#### Q: Create a uniform random tensor (in [0,1]) of dimension [100] and add every element by 2. 

In [None]:
e = tf.random_uniform([100])
f = tf.fill([100],2.0)
g = tf.add(e,f)

In [None]:
with tf.Session() as sess:
    print(sess.run(g))

#### Matrix Multiplications

#### Q: Create a unifirm random matrix of size 10x10 and multiply by a matrix of size 10x2 filled with ones.

In [None]:
tf.reset_default_graph()

In [None]:
W = tf.random_uniform([10, 10])
x = tf.ones([10,2])
y=tf.matmul(W,x)

In [None]:
with tf.Session() as sess:
    print(sess.run(y))

In [None]:
sess = tf.InteractiveSession()

In [None]:
y.eval()

In [None]:
# won't work because we reset the graph
c.eval()

In [None]:
sess.close()

#### ￼Main TF Math Ops
|Operation Type | Sample Ops                                      |
|:--------------|:------------------------------------------------|
|element-wise   | add, sub, mul, sqrt, pow, exp, log, cos, erf,...|
|array          | concat, slice, rank, shape, reshape             |
|matrix         | matmul, transpose, MatrixInverse, solve, svd,...|
|logical        | and, or, xor                                    |
|reduction      | reduce_sum, reduce_min,max                      |
|slicing/joining|tile, pack, unpack, split,...                    |
|               |                                                 |

### Q: Let $y = f(x) = 2x^2 - 5x + 2$. Write a TF program that generates a 2-D tensor of shape=[20, 2] as follows: 1st column contains values -20, ..., 19. Values in the 2nd column are $y_i = f(x_i)$ where $x_i$ is the corresponding value in the first column

#### Let's do it wuth numpy

In [None]:
x=np.arange(-20,20)
x=x.reshape((len(x),1))

In [None]:
x.shape

In [None]:
y=np.hstack((x,2*x**2-5*x+2))

In [None]:
plt.plot(y[:,0],y[:,1]);

#### Tensorflow

In [None]:
sess=tf.InteractiveSession()

In [None]:
xx=tf.reshape(tf.range(-20,20),[40,1])
yy=2*tf.square(xx)-5*xx+2

In [None]:
xx,yy

In [None]:
r = tf.concat([xx,yy],axis=1)

In [None]:
r_ev=r.eval()

In [None]:
plt.plot(r_ev[:,0],r_ev[:,1])

### Q: How to find the minimum of this function using tensorflow?

### Optimization

#### Variables

In [None]:
xv=tf.Variable(-20.0)

In [None]:
# gives error. we need to run global_variables_initializer() first.
xv.eval()

In [None]:
sess.run(tf.global_variables_initializer())

In [None]:
xv.eval()

In [None]:
yv=2*tf.square(xv)-5*xv+2

In [None]:
yv.eval()

In [None]:
gd = tf.train.GradientDescentOptimizer(learning_rate=0.1)

In [None]:
z = gd.minimize(yv)

In [None]:
z

Let's look at the graph.
[Update the graph](#Graph-Visualization-by-Tensorboard)

In [None]:
converged=False
old_x=xv.eval()
while not converged:
    sess.run(z)
    print xv.eval(), yv.eval()
    if abs(old_x-xv.eval())<1e-5:
        converged=True
    old_x = xv.eval()

## Softmax

https://en.wikipedia.org/wiki/Softmax_function

### Numpy Softmax

#### Q: Write a Softmax function using numpy.

In [None]:
def softmax(x,t=1):
    # x should be 2d: array of vectors
    x1=x-np.max(x,axis=1, keepdims=True)
    x2=np.exp(x1/t)
    x3=x2/np.sum(x2, axis=1, keepdims=True)
    return x3

#### Q: What is softmax of [1,2,3,4,5]?

In [None]:
x=np.arange(1,6)

In [None]:
softmax([x])

In [None]:
softmax([x], t=10)

In [None]:
softmax([x], t=0.001)

### Q: Create a Softmax digit classifier using numpy.

In [None]:
d=pd.read_csv("./train.csv")

In [None]:
X=d.values[:,1:]

In [None]:
L=pd.get_dummies(d.label).values

In [None]:
y=L

In [None]:
X.shape, y.shape

In [None]:
W=np.random.randn(X.shape[1],y.shape[1])

In [None]:
W.shape

In [None]:
b=np.random.randn(*y.shape)

In [None]:
b.shape

In [None]:
y_hat=np.matmul(X,W)+b

In [None]:
rmse = np.mean((y-y_hat)**2)**0.5

In [None]:
rmse

In [None]:
e=softmax(y_hat,t=0.0001)-y

In [None]:
err_cnt=np.sum(np.sum(np.abs(e), axis=1))/2

In [None]:
1-err_cnt/len(y)

### Q: Can we calculate W?
$ XW = y $

$ W = X^{-1}y $

In [None]:
W=np.matmul(np.linalg.pinv(X),y)

In [None]:
W.shape

In [None]:
y_hat = np.matmul(X,W)

In [None]:
rmse = np.mean((y-y_hat)**2)**0.5

In [None]:
rmse

In [None]:
e=softmax(y_hat,t=0.0001)-y

In [None]:
err_cnt=np.sum(np.sum(np.abs(e), axis=1))/2

In [None]:
1-err_cnt/len(y)

### Q: How can we optimize W?

### Q: Create a Softmax digit classifier with TensorFlow

In [None]:
tf.reset_default_graph()

In [None]:
sess=tf.InteractiveSession()

In [None]:
W_init=np.array(np.random.randn(X.shape[1],L.shape[1])
                ,dtype=np.float32)

In [None]:
W_init.dtype, W_init.shape

In [None]:
x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])

In [None]:
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

In [None]:
y = tf.nn.softmax(tf.matmul(x,W) + b)

In [None]:
cross_entropy = -tf.reduce_sum(y_*tf.log(y))

In [None]:
train_step = tf.train.GradientDescentOptimizer(1e-6).minimize(cross_entropy)

A possibly better option:
### Adam Optimizer:  A METHOD FOR STOCHASTIC OPTIMIZATION

https://arxiv.org/pdf/1412.6980v8.pdf

Pros:
- It controls the learning rate
- uses momentum (moving averages of the parameters)

Cons:
- computationaly more expansive (slower)


In [None]:
# train_step = tf.train.AdamOptimizer(1e-2, beta1=0.5).minimize(cross_entropy)

In [None]:
sess.run(tf.global_variables_initializer())

In [None]:
batch_size = 10
n = len(X)
for i in range(1000):
    batch_idx = np.arange(i*batch_size,(i+1)*batch_size)%n
    _, bce = sess.run([train_step,cross_entropy],feed_dict={x: X[batch_idx,:], y_: L[batch_idx,:]})
    if i%100 == 0:
        print("Iter {}, Loss {}".format(i,bce))

In [None]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))

In [None]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

In [None]:
print sess.run(accuracy, feed_dict={x: X, y_: L})

In [None]:
sess.close()

## Introduction to Keras

https://keras.io/

Note: reset kernel here. 

In [None]:
import keras as K
from keras.models import Sequential  
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Dropout
from keras.layers.core import Dense, Activation, Flatten, Reshape
from keras.optimizers import Adam, RMSprop
from keras.utils import np_utils

In [None]:
import pandas as pd
import numpy as np

In [None]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

In [None]:
d=pd.read_csv("../train.csv")

In [None]:
X=d.values[:,1:]

In [None]:
y=pd.get_dummies(d.label).values

In [None]:
X.shape, y.shape

In [None]:
model = Sequential()
model.add(Dense(250, input_dim=784))
model.add(Activation('relu'))
model.add(Dense(10))#, input_dim=784))
model.add(Activation('softmax'))

In [None]:
#another way:
# model = Sequential()
# model.add(Dense(512, activation='relu', input_shape=(784,)))
# model.add(Dropout(0.2))
# model.add(Dense(512, activation='relu'))
# model.add(Dropout(0.2))
# model.add(Dense(10, activation='softmax'))

In [None]:
SVG(model_to_dot(model, show_shapes=True, show_layer_names=True).create(prog='dot', format='svg'))

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(1e-2), metrics=['accuracy']) 

In [None]:
model.fit(X, y, nb_epoch=10, shuffle=True, batch_size=32)  

In [None]:
classes=model.predict_classes(X,verbose=0)  
test_accuracy = np.mean(np.equal(np.argmax(y,1),classes))  
print("Train Accuracy {}".format(test_accuracy))

### Q: Accuracy is too low. Can we improve?
Hint: try a different optimizer (e.g. RMSprop)

In [None]:
model = Sequential()
model.add(Convolution2D(5, 3, 3, activation='relu', input_shape=(28,28,1)))
model.add(Convolution2D(7, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [None]:
SVG(model_to_dot(model, show_shapes=True, show_layer_names=True).create(prog='dot', format='svg'))

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(X.reshape(len(X),28,28,1), y, 
          batch_size=32, nb_epoch=10, verbose=1)

### Q: What we measured was training accuracy. What's the test accuracy?

### Q: How would batch_size, and epoch affect accuracy?