<a href="https://colab.research.google.com/github/maggieliuzzi/deep_learning/blob/master/TensorFlow1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**TensorFlow 1 Basics**

In [1]:
from __future__ import print_function, division
from builtins import range
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
%tensorflow_version 1.x
import tensorflow as tf  # TensorFlow 1
print(tf.__version__)

TensorFlow 1.x selected.
1.15.2


In [0]:
## Placeholders

A = tf.placeholder(tf.float32, shape=(5, 5), name='A')  # shape and name are optional
v = tf.placeholder(tf.float32)

In [0]:
w = tf.matmul(A, v)

In [4]:
with tf.Session() as session:
    # the variable values are fed in via the appropriately named argument "feed_dict"
    # v needs to be of shape=(5, 1) not just shape=(5,)
    output = session.run(w, feed_dict={A: np.random.randn(5, 5), v: np.random.randn(5, 1)})

    print(output, type(output))

[[ 1.4074817 ]
 [ 1.6627809 ]
 [ 1.2102717 ]
 [-0.36671883]
 [ 4.8174543 ]] <class 'numpy.ndarray'>


In [0]:
# TensorFlow variables are like Theano shared variables.
# But Theano variables are like TensorFlow placeholders.

# A tf variable can be initialized with a numpy array or a tf array, 
# or anything that can be turned into a tf tensor
x = tf.Variable(tf.random_normal((2, 2)))
# x = tf.Variable(np.random.randn(2, 2))
t = tf.Variable(0) # a scalar

In [6]:
# initialise the variables first
init = tf.global_variables_initializer()

with tf.Session() as session:
    out = session.run(init) # then "run" the init operation
    # print(out) # None

    print(x.eval())  # the initial value of x  # eval() in tf is like get_value() in Theano
    print(t.eval())

[[ 1.258659    3.6224153 ]
 [-0.05675203  0.7406058 ]]
0


In [0]:
# finding minimum of a simple cost function
u = tf.Variable(20.0)
cost = u*u + u + 1.0

In [0]:
# Unlike Theano, in TensorFlow you don't write the updates yourself, 
# you choose an optimiser that implements the algorithm you want
train_op = tf.train.GradientDescentOptimizer(0.3).minimize(cost)  # learning rate: 0.3

In [9]:
init = tf.global_variables_initializer()
with tf.Session() as session:
    session.run(init)  # running a session

    # While the weight update is automated, the loop itself is not, so call train_op until convergence
    for i in range(12):
        session.run(train_op)
        print("i = %d, cost = %.3f, u = %.3f" % (i, cost.eval(), u.eval()))  # tracking the cost function

i = 0, cost = 67.990, u = 7.700
i = 1, cost = 11.508, u = 2.780
i = 2, cost = 2.471, u = 0.812
i = 3, cost = 1.025, u = 0.025
i = 4, cost = 0.794, u = -0.290
i = 5, cost = 0.757, u = -0.416
i = 6, cost = 0.751, u = -0.466
i = 7, cost = 0.750, u = -0.487
i = 8, cost = 0.750, u = -0.495
i = 9, cost = 0.750, u = -0.498
i = 10, cost = 0.750, u = -0.499
i = 11, cost = 0.750, u = -0.500


**Neural Network**

In [0]:
def get_normalized_data():
    print("Reading in and transforming data...")

    if not os.path.exists('../large_files/train.csv'):
        print('Looking for ../large_files/train.csv')
        print('You have not downloaded the data and/or not placed the files in the correct location.')
        print('Please get the data from: https://www.kaggle.com/c/digit-recognizer')
        print('Place train.csv in the folder large_files adjacent to the class folder')
        exit()

    df = pd.read_csv('../large_files/train.csv')
    data = df.values.astype(np.float32)
    np.random.shuffle(data)
    X = data[:, 1:]
    Y = data[:, 0]

    Xtrain = X[:-1000]
    Ytrain = Y[:-1000]
    Xtest  = X[-1000:]
    Ytest  = Y[-1000:]

    # normalize the data
    mu = Xtrain.mean(axis=0)
    std = Xtrain.std(axis=0)
    np.place(std, std == 0, 1)
    Xtrain = (Xtrain - mu) / std
    Xtest = (Xtest - mu) / std
    
    return Xtrain, Xtest, Ytrain, Ytest


In [0]:
def y2indicator(y):
    N = len(y)
    y = y.astype(np.int32)
    ind = np.zeros((N, 10))
    for i in range(N):
        ind[i, y[i]] = 1
    return ind

In [0]:
def error_rate(p, t):
    return np.mean(p != t)

In [0]:
# step 1: get the data and define all the usual variables
Xtrain, Xtest, Ytrain, Ytest = get_normalized_data()

In [0]:
max_iter = 15
print_period = 50

lr = 0.00004
reg = 0.01

Ytrain_ind = y2indicator(Ytrain)
Ytest_ind = y2indicator(Ytest)

N, D = Xtrain.shape
batch_sz = 500
n_batches = N // batch_sz

M1 = 300  # size of 1st hidden layer
M2 = 100  # size of 2nd hidden layer
K = 10
W1_init = np.random.randn(D, M1) / np.sqrt(D)
b1_init = np.zeros(M1)
W2_init = np.random.randn(M1, M2) / np.sqrt(M1)
b2_init = np.zeros(M2)
W3_init = np.random.randn(M2, K) / np.sqrt(M2)
b3_init = np.zeros(K)

In [0]:
# define variables and expressions
X = tf.placeholder(tf.float32, shape=(None, D), name='X')
T = tf.placeholder(tf.float32, shape=(None, K), name='T')
W1 = tf.Variable(W1_init.astype(np.float32))
b1 = tf.Variable(b1_init.astype(np.float32))
W2 = tf.Variable(W2_init.astype(np.float32))
b2 = tf.Variable(b2_init.astype(np.float32))
W3 = tf.Variable(W3_init.astype(np.float32))
b3 = tf.Variable(b3_init.astype(np.float32))

In [0]:
# define the model
Z1 = tf.nn.relu( tf.matmul(X, W1) + b1 )
Z2 = tf.nn.relu( tf.matmul(Z1, W2) + b2 )
Yish = tf.matmul(Z2, W3) + b3 # the softmax is already included in the cost function

In [0]:
# softmax_cross_entropy_with_logits take in the "logits"
# if you wanted to know the actual output of the neural net,
# you could pass "Yish" into tf.nn.softmax(logits)
cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Yish, labels=T))


In [0]:
# we choose the optimizer but don't implement the algorithm ourselves
# let's go with RMSprop, since we just learned about it.
# it includes momentum!
train_op = tf.train.RMSPropOptimizer(lr, decay=0.99, momentum=0.9).minimize(cost)

In [0]:
# we'll use this to calculate the error rate
predict_op = tf.argmax(Yish, 1)

In [0]:
costs = []
init = tf.global_variables_initializer()
with tf.Session() as session:
    session.run(init)

    for i in range(max_iter):
        for j in range(n_batches):
            Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
            Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]

            session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
            if j % print_period == 0:
                test_cost = session.run(cost, feed_dict={X: Xtest, T: Ytest_ind})
                prediction = session.run(predict_op, feed_dict={X: Xtest})
                err = error_rate(prediction, Ytest)
                print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err))
                costs.append(test_cost)

In [0]:
plt.plot(costs)
plt.show()
# increase max_iter and notice how the test cost starts to increase.
# are we overfitting by adding that extra layer?
# how would you add regularization to this model?