In [1]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import time
import random
import datetime

import scipy
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow.examples.tutorials.mnist import input_data
slim = tf.contrib.slim

# Training

In [2]:
mnist = input_data.read_data_sets('MNIST_data')
x_size, y_size = 28, 28
n_classes = 10

n_epochs = 1000
original_ckpt_path = './models/MNIST_NLA_vanilla.ckpt'
#ckpt_path = './models/MNIST_NLA_vanilla.ckpt'
variables_file = './variables/scheme1_fr.npz'
activations_file = './variables/scheme1_dr.npz'

def timestamp():
    d = datetime.datetime.now()
    return d.strftime("%Y/%m/%d/%X")

timestamp()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


'2016/12/16/04:28:56'

In [3]:
def random_batch_iterator(x, y, batch_size):
    n = x.shape[0]
    assert n == y.shape[0]
    
    while True:
        index = np.random.randint(n, size=batch_size)
        x_batch, y_batch = x[index], y[index]
        yield x_batch.copy(), y_batch.copy()
        
def batch_iterator(x, y, batch_size):
    n = x.shape[0]
    assert n == y.shape[0]
    
    for i in range(0, n, batch_size):
        x_batch, y_batch = x[i:i+batch_size], y[i:i+batch_size]        
        yield x_batch.copy(), y_batch.copy()

In [4]:
def build_cnn(inputs, is_training, n_conv, conv_base, conv_mul,
              conv_size, pool_size):
    l = inputs
    for i in range(n_conv):
        n_filters = conv_base * conv_mul ** i
        l = slim.conv2d(l, n_filters, [conv_size, conv_size],
                        scope='conv{}'.format(i+1), is_training=is_training)
        l = slim.max_pool2d(l, [pool_size, pool_size], scope='maxpool{}'.format(i+1), is_training=is_training)
    l = slim.flatten(l)
    
    l = slim.dropout(l, 0.5, scope='dropout', is_training=is_training)
    l = slim.fully_connected(l, 10, activation_fn=None, scope='logits', is_training=is_training)
    return l

def build_cnn_method1(inputs, is_training, n_conv, conv_base, conv_mul,
              conv_size, pool_size, init_vals):
    l = inputs
    
    for i in range(n_conv):
        n_filters = conv_base * conv_mul ** i
        M = n_filters // 4
        
        batch_size, w, h, in_channels = l.get_shape()
        
        with tf.variable_scope('conv{}'.format(i+1)):
            b = init_vals[0][i]
            init_val = tf.constant(b, dtype=tf.float32)
            #f = tf.get_variable('basis', initializer=init_val,
            #                   trainable=False)
            l = tf.nn.depthwise_conv2d(l, init_val, [1, 1, 1, 1], padding='SAME', name='dpconv')
            a = init_vals[1][i]
            init_a = tf.constant(a, dtype=tf.float32)
            #f1 = tf.get_variable('a1', initializer=init_a,
             #                  trainable=False)
            biases = tf.get_variable(shape=[n_filters], name='biases')
            l = tf.nn.relu(tf.nn.conv2d(l, init_a, strides=[1, 1, 1, 1], padding='SAME') + biases)
            
        l = slim.max_pool2d(l, [pool_size, pool_size], scope='maxpool{}'.format(i+1))
    l = slim.flatten(l)
    
    l = slim.dropout(l, 0.5, scope='dropout', is_training=is_training)
    l = slim.fully_connected(l, 10, activation_fn=None, scope='logits', trainable=False)
    return l

def build_loss(logits, y_true):
    logloss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_true),
                             name='logloss')
    return logloss

In [5]:
batch_size = 512
test_list = list(batch_iterator(
        mnist.test.images, mnist.test.labels, batch_size=batch_size))

n_conv = 2
conv_base = 32
conv_mul = 2
conv_size = 5
pool_size = 2

def build_model(graph, build_cnn, init_vals):
    with graph.as_default():#, graph.device('/cpu:0'):
        with tf.variable_scope('model') as vs:
            is_training = tf.placeholder(tf.bool)
            x_ph = tf.placeholder(tf.float32, shape=[batch_size, x_size * y_size])
            x_image = tf.reshape(x_ph, [-1, x_size, y_size, 1])
            y_ph = tf.placeholder(tf.int64, shape=[batch_size])
            
            logits = build_cnn(x_image, is_training=is_training, n_conv=n_conv,
                               conv_base=conv_base, conv_mul=conv_mul,
                               conv_size=conv_size, pool_size=pool_size, init_vals=init_vals)
            
            prediction = tf.nn.softmax(logits, name='predictions')

            loss = build_loss(logits, y_ph)

            #optimizer = tf.train.AdamOptimizer().minimize(loss, name='optimizer')

            correct_prediction = tf.equal(tf.argmax(prediction, 1), y_ph)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

        # Code to use of tensorboard
        with tf.name_scope('summaries'):
            tf.scalar_summary('log_loss', loss)
            tf.scalar_summary('acc', accuracy)
            merged_summary = tf.merge_all_summaries()
            
    return {
        'is_training': is_training,
        'x_ph': x_ph,
        'y_ph': y_ph,
        'prediction': prediction,
        'loss': loss,
        'accuracy': accuracy,
        'merged_summary': merged_summary
    }

In [6]:
def train_net(layers, session, n_epochs, tb_path='/tmp/tf/', ckpt=None):
    tb_path = tb_path + timestamp()
    
    l = layers
    x_ph, y_ph, is_training = l['x_ph'], l['y_ph'], l['is_training']
    val_operations = [l['merged_summary'], l['accuracy'], l['loss']]
    train_operations = [l['optimizer']] + val_operations
    
    train_iterator = random_batch_iterator(
        mnist.train.images, mnist.train.labels, batch_size=batch_size)
    val_iterator = random_batch_iterator(
        mnist.validation.images, mnist.validation.labels, batch_size=batch_size)
        
    train_writer = tf.train.SummaryWriter(tb_path+'/train', session.graph)
    val_writer = tf.train.SummaryWriter(tb_path+'/val', session.graph)
    
    best_acc = 0.0
    
    variables_to_restore = slim.get_variables_to_restore(exclude=["model/conv1", "model/conv2", 
                                                                  "model/model/conv1", "model/model/conv2"])
    #print(variables_to_restore)
    print([v.name for v in variables_to_restore])
    restore = tf.train.Saver(variables_to_restore)
    restore.restore(session, ckpt)
    #to_drop = False
    
    tf.global_variables_initializer().run()
    for epoch in range(n_epochs):
        x_batch, y_batch = next(train_iterator)
        feed_dict = {x_ph: x_batch, y_ph: y_batch, is_training: True}
        _, summary, acc, _ = session.run(train_operations, feed_dict)
        train_writer.add_summary(summary, epoch)

        x_batch, y_batch = next(val_iterator)
        feed_dict = {x_ph: x_batch, y_ph: y_batch, is_training: False}
        summary, acc, _ = session.run(val_operations, feed_dict)
        val_writer.add_summary(summary, epoch)
        print(acc)
                
    return best_acc

def evaluate_net(layers, session, ckpt):
    l = layers
    x_ph, y_ph, is_training = l['x_ph'], l['y_ph'], l['is_training']
    
    test_iterator = iter(test_list)
    
    n, test_acc = 0, 0.0
    variables_to_restore = slim.get_variables_to_restore(exclude=["model/conv1/weights", "model/conv2/weights", 
                                                                  "model/model/conv1/weights", "model/model/conv2/weights"])
    #print(variables_to_restore)
    print([v.name for v in variables_to_restore])
    restore = tf.train.Saver(variables_to_restore)
    restore.restore(session, ckpt)
    start = time.time()
    for x_batch, y_batch in test_iterator:
        if len(x_batch) != batch_size:
            break
        feed_dict = {x_ph: x_batch, y_ph: y_batch, is_training: False}
        test_acc += l['accuracy'].eval(feed_dict=feed_dict)
        n += 1
    end = time.time()
    test_acc = test_acc / n
    return test_acc, end-start

In [47]:
data = np.load('variables/fd_4_6.npz')

In [48]:
init_vals = [[np.rollaxis(data['basis1'], 0, 3)[:, :, None, :],
              np.rollaxis(data['basis2'], 0, 3)[:, :, None, :].repeat(32, 2)],
             [data['a1'][None, :, :, :].swapaxes(2, 3),
              data['a2'][None, :, :, :].swapaxes(2, 3).reshape((1, 1, -1, 64))]]

In [49]:
graph = tf.Graph()

layers = build_model(graph, build_cnn_method1, init_vals)
with tf.Session(graph=graph) as session:
    test_acc, els = evaluate_net(layers, session, ckpt=original_ckpt_path)

test_acc, els

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
Instructions for updating:
Please switch to tf.summary.merge_all.
Instructions for updating:
Please switch to tf.summary.merge.
['model/conv1/biases:0', 'model/conv2/biases:0', 'model/logits/weights:0', 'model/logits/biases:0']


(0.95836759868421051, 3.0817477703094482)

16 // 4

## Saving weights

In [95]:
with tf.Session(graph=graph) as session:
    restorer = tf.train.Saver()
    restorer.restore(session, ckpt_path)
    conv_vars = {}
    for i in range(n_conv):
        for name in ['weights', 'bias']:
            full_name = 'conv{}/{}'.format(i+1, name)
            conv_vars[full_name] = slim.get_variables(scope='model/'+full_name)[0].eval()
            
np.savez(variables_file, **conv_vars)

NameError: name 'ckpt_path' is not defined

In [None]:
np.load(variables_file).keys()

In [None]:
n_measurements = 100

with tf.Session(graph=graph) as session:
    restorer = tf.train.Saver()
    restorer.restore(session, ckpt_path)
    mesurements = []
    for i in range(n_measurements):
        test_acc, els = evaluate_net(layers, session)
        mesurements.append(els)

In [None]:
plt.hist(mesurements, bins=40, range=(0.135, 0.15))