In [0]:
import tensorflow as tf # import tensorflow
from tensorflow.contrib.tensorboard.plugins import projector # used to visualize data
import datetime 
import numpy as np  # used for math operations
from scipy.misc import bytescale
import matplotlib.pyplot as plt # used to plot
import os # used to perform operations within the operating system

In [0]:
!pip3 install -q tflearn # install tflearn
import tflearn

In [0]:
from google.colab import drive 
drive.mount('/content/drive') # mount drive

In [0]:
# navigate to the class folder you made in your Google Drive
# navigate to drive/My Drive/Data_Science_and_Deep_Learning for Max Planck class
os.chdir('drive/My Drive/DeepLearningFall2018')

In [0]:
# see what is in this directory
os.listdir()

In [0]:
# load in mnist dataset through tflearn
import tflearn.datasets.mnist as mnist # import dataset
X, Y, testX, testY = mnist.load_data(one_hot=True) # load in training and testing sets

In [0]:
# print the shapes of training and testing sets
print(X.shape, Y.shape, testX.shape, testY.shape)

In [0]:
print(Y[:10, ...]) # print the first 10 labels - value of 1 in appropriate spot for that image

In [0]:
# change the background to white and number to black
X, testX = 1. - X, 1. - testX

In [0]:
def montage(x, return_grid=False):
  '''Function that takes in a 2D tensor of images x
     with shape n x d, where n is the number of images
     and d is the number of pixels, and puts them 
     together as a single image.'''
  
  num = int(np.sqrt(x.shape[0]))
  m = int(np.ceil(np.sqrt(x.shape[1])))
  n = m
  grid = np.zeros([num*m, num*n])
  
  for i in range(num):
    for j in range(num):
      grid[i*m:i*m+m, j*n:j*n+n] = bytescale(x[i*num+j, ...].reshape([28, 28]))
      
  if return_grid:
    return grid
      
  fig = plt.figure(figsize=(15, 15))
  a1 = fig.add_subplot(111)
  a1.imshow(grid)
  a1.grid(False)
  plt.show()
  return 

In [0]:
montage(X[:500, :])

In [0]:
# plot a histogram of the values of the first 1000 images
plt.hist(X[:1000, :].flatten(), bins=10)
plt.show()

In [0]:
mean = np.mean(X, 0) # take the mean of each pixel in the training set
X -= mean  # center pixel values of training set around 0

In [0]:
# center pixel values of test set around 0
testX -= mean

In [0]:
# plot a histogram of the values in the normalized training set
plt.hist(X[:1000, :].flatten(), bins=10)
plt.show()

In [0]:
montage(X[:100, :])

In [0]:
def install_tensorboard_dep():
  '''This function installs the necessary software to get tensorboard
     working on colaboratory.'''
  
  if 'ngrok-stable-linux-amd64.zip' not in os.listdir():
    !wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
    !unzip ngrok-stable-linux-amd64.zip
    os.system('n')
    
  return

In [0]:
def start_tensorboard():
  # This function starts tensorboard so you can visualize training.
  
  LOG_DIR = '/tmp/tflearn_logs'
  get_ipython().system_raw('tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'.format(LOG_DIR))
  get_ipython().system_raw('./ngrok http 6006 &')
  ! curl -s http://localhost:4040/api/tunnels | python3 -c \
  "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"
  
  return

In [0]:
def viz_mnist_embedding(tensor, images, labels):
  '''Takes in a TF variable tensor, along with images and their 
     corresponding labels and sets up the projector to 
     visualize the data space.'''
  
  tb_dir = '/tmp/tflearn_logs'
  sess = tf.Session()
  sess.run(tensor.initializer)
  summary_writer = tf.summary.FileWriter(tb_dir)
  config = projector.ProjectorConfig()
  embedding = config.embeddings.add()
  embedding.tensor_name = tensor.name
  embedding.metadata_path = os.path.join(tb_dir, 'metadata.tsv')
  embedding.sprite.image_path = os.path.join(tb_dir, 'mnistdigits.png') 
  embedding.sprite.single_image_dim.extend([28,28])
  projector.visualize_embeddings(summary_writer, config)
  saver = tf.train.Saver([tensor])
  saver.save(sess, os.path.join(tb_dir, 'mnist_fc.ckpt'), 1)
  
  image_grid = montage(images, True)
  plt.imsave(os.path.join(tb_dir, 'mnistdigits.png'), image_grid, cmap='gray')
  
  with open(os.path.join(tb_dir, 'metadata.tsv'),'w') as f:
    f.write("Index\tLabel\n")
    for index,label in enumerate(labels):
      f.write("%d\t%d\n" % (index,label))
  f.close()

  return

In [0]:
tf.reset_default_graph()  # reset the graph if running more than once

In [0]:
# make the input layer of the network
input_layer = tflearn.input_data(shape=[None, 784])

In [0]:
# make a variable for the visualization
emb = tf.Variable(X[:2000, :], name='input_images')

In [0]:
# first hidden layer
hidden1 = tflearn.fully_connected(input_layer,  # input to this layer 
                                  500, # number of nodes looking at input
                                  activation='tanh', # hyperbolic tangent activation function
                                  name='fc1', # name of this layer - optional
                                  regularizer='L2') # keeps layer from relying too heavily on any given weight

# dropout for first hidden layer --- keep 70%
# helps generalization by not allowing nodes to rely too heavily on certain nodes in previous layer
hidden1 = tflearn.dropout(hidden1, 0.7)

# make the second hidden layer
hidden2 = tflearn.fully_connected(hidden1, 
                                  500, 
                                  activation='tanh', 
                                  name='fc2', 
                                  regularizer='L2')

# dropout for second layer --- keep 70% of values
hidden2 = tflearn.dropout(hidden2, 0.7)

# make the third hidden layer
hidden3 = tflearn.fully_connected(hidden2, 
                                  500, 
                                  activation='tanh', 
                                  name='fc3')

# dropout for third layer --- keep 70% of values
hidden3 = tflearn.dropout(hidden3, 0.7)

# make the output layer 
output = tflearn.fully_connected(hidden3, 
                                 10, # number of nodes = number of classes in output layer
                                 activation='softmax', # used in output layer - turns activations into class probabilities
                                 name='output_layer')

In [0]:
# describe the optimizer --- learning rate between 1e-5 and .1
sgd = tflearn.SGD(learning_rate=0.1)

In [0]:
# describe how to update weights
network = tflearn.regression(output, optimizer=sgd, loss='categorical_crossentropy')

In [0]:
tensorboard_name = 'mnist_fc_tflearn'  # name for tensorboard run
viz_mnist_embedding(emb, X[:2000, :], np.argmax(Y[:2000, :], 1))

In [0]:
install_tensorboard_dep() # install tensorboard dependencies

In [0]:
start_tensorboard()  # start tensorboard to visualize training/model/etc.

In [0]:
# build the model based on architecture/parameters described above
model = tflearn.DNN(network, tensorboard_verbose=3) 

In [0]:
model.fit(X,  # training data
          Y,  # training labels
          n_epoch=10,  # number of times to go through dataset
          validation_set=(testX, testY),  # validation dataset/labels
          batch_size=150,  # number of practice images to look at at one time
          snapshot_step=200,  # how often to test of validation
          show_metric=True,
          run_id='mnist_fc_3layers')  # what to show model under in tensorboard

In [0]:
model.save('mnist_fc_3layers_model')  # save the trained model

In [0]:
# load in trained model
from tensorflow.python import pywrap_tensorflow
reader = pywrap_tensorflow.NewCheckpointReader('mnist_fc_3layers_model')
model_vars = reader.get_variable_to_shape_map() # get weights of trained model

In [0]:
print(model_vars) # print out model variables

In [0]:
# get weights of first hidden layer
hidden1_w = reader.get_tensor('fc1/W')

In [0]:
# print shape of first hidden layer's weights
print(hidden1_w.shape)

In [0]:
hidden1_w = hidden1_w.T  # transpose hidden weights to go into montage function

In [0]:
print(hidden1_w.shape)

In [0]:
montage(hidden1_w)  # view weights of first hidden layer