# Initializing and Restoring EMI-RNN Graphs

The *EMI-RNN* implementation supports four forms of initialization/restoring:
1. An entirely new graph can be constructed with randomly initialized weights.
2. A saved graph can be loaded into the current `EMI_Driver`.
2. An entirely new graph can be constructed with weights initialized from a saved graph. This behavior is essentially restoration of a saved graph.
3. (*Experimental*) Initializing/Restoring using numpy matrices. 

All three methods will be illustrated in this notebook. This notebook uses the HAR dataset and builds on the [EMI LSTM example.ipynb](00_emi_lstm_example.ipynb)

In [1]:
from __future__ import print_function
import os
import sys
import tensorflow as tf
import numpy as np
# To include edgeml in python path
sys.path.insert(0, '../../')
os.environ['CUDA_VISIBLE_DEVICES'] ='0'

# MI-RNN and EMI-RNN imports
from edgeml.graph.rnn import EMI_DataPipeline
from edgeml.graph.rnn import EMI_BasicLSTM
from edgeml.trainer.emirnnTrainer import EMI_Trainer, EMI_Driver
import edgeml.utils

Let us set up some network parameters for the computation graph.

In [2]:
# Network parameters for our LSTM + FC Layer
NUM_HIDDEN = 32
NUM_TIMESTEPS = 48
NUM_FEATS = 9
FORGET_BIAS = 1.0
NUM_OUTPUT = 6
USE_DROPOUT = True
KEEP_PROB = 0.75

# For dataset API
PREFETCH_NUM = 5
BATCH_SIZE = 32

# Number of epochs in *one iteration*
NUM_EPOCHS = 3
# Number of iterations in *one round*. After each iteration,
# the model is dumped to disk. At the end of the current
# round, the best model among all the dumped models in the
# current round is picked up..
NUM_ITER = 1
# A round consists of multiple training iterations and a belief
# update step using the best model from all of these iterations
NUM_ROUNDS = 2

# A staging direcory to store models
MODEL_PREFIX = '/tmp/models/model-lstm'

# Loading Data

In [3]:
# Loading the data
x_train, y_train = np.load('./HAR/48_16/x_train.npy'), np.load('./HAR/48_16/y_train.npy')
x_test, y_test = np.load('./HAR/48_16/x_test.npy'), np.load('./HAR/48_16/y_test.npy')
x_val, y_val = np.load('./HAR/48_16/x_val.npy'), np.load('./HAR/48_16/y_val.npy')

# BAG_TEST, BAG_TRAIN, BAG_VAL represent bag_level labels. These are used for the label update
# step of EMI/MI RNN
BAG_TEST = np.argmax(y_test[:, 0, :], axis=1)
BAG_TRAIN = np.argmax(y_train[:, 0, :], axis=1)
BAG_VAL = np.argmax(y_val[:, 0, :], axis=1)
NUM_SUBINSTANCE = x_train.shape[1]
print("x_train shape is:", x_train.shape)
print("y_train shape is:", y_train.shape)
print("x_test shape is:", x_val.shape)
print("y_test shape is:", y_val.shape)

x_train shape is: (6220, 6, 48, 9)
y_train shape is: (6220, 6, 6)
x_test shape is: (1132, 6, 48, 9)
y_test shape is: (1132, 6, 6)


In [4]:
# Define the linear secondary classifier
def createExtendedGraph(self, baseOutput, *args, **kwargs):
    W1 = tf.Variable(np.random.normal(size=[NUM_HIDDEN, NUM_OUTPUT]).astype('float32'), name='W1')
    B1 = tf.Variable(np.random.normal(size=[NUM_OUTPUT]).astype('float32'), name='B1')
    y_cap = tf.add(tf.tensordot(baseOutput, W1, axes=1), B1, name='y_cap_tata')
    self.output = y_cap
    self.graphCreated = True
    
def addExtendedAssignOps(self, graph, W_val=None, B_val=None):
    W1 = graph.get_tensor_by_name('W1:0')
    B1 = graph.get_tensor_by_name('B1:0')
    W1_op = tf.assign(W1, W_val)
    B1_op = tf.assign(B1, B_val)
    self.assignOps.extend([W1_op, B1_op])

def restoreExtendedGraph(self, graph, *args, **kwargs):
    y_cap = graph.get_tensor_by_name('y_cap_tata:0')
    self.output = y_cap
    self.graphCreated = True
    
def feedDictFunc(self, keep_prob, **kwargs):
    feedDict = {self._emiGraph.keep_prob: keep_prob}
    return feedDict
    
EMI_BasicLSTM._createExtendedGraph = createExtendedGraph
EMI_BasicLSTM._restoreExtendedGraph = restoreExtendedGraph
EMI_BasicLSTM.addExtendedAssignOps = addExtendedAssignOps

if USE_DROPOUT is True:
    EMI_Driver.feedDictFunc = feedDictFunc

## 1. Initializing a New Computation Graph

In the most common use cases, a new EMI-RNN graph would be created and trained

In [18]:
tf.reset_default_graph()

inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS, NUM_OUTPUT)
emiLSTM = EMI_BasicLSTM(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS,
                        forgetBias=FORGET_BIAS, useDropout=USE_DROPOUT)
emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy')

# Construct the graph
g1 = tf.Graph()    
with g1.as_default():
    x_batch, y_batch = inputPipeline()
    y_cap = emiLSTM(x_batch)
    emiTrainer(y_cap, y_batch)
    
with g1.as_default():
    emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)


Lets initialize a new session with this graph and train a model. The saved model will be used later for restoring.

In [6]:
emiDriver.initializeSession(g1)
y_updated, modelStats = emiDriver.run(numClasses=NUM_OUTPUT, x_train=x_train,
                                      y_train=y_train, bag_train=BAG_TRAIN,
                                      x_val=x_val, y_val=y_val, bag_val=BAG_VAL,
                                      numIter=NUM_ITER, keep_prob=KEEP_PROB,
                                      numRounds=NUM_ROUNDS, batchSize=BATCH_SIZE,
                                      numEpochs=NUM_EPOCHS, modelPrefix=MODEL_PREFIX,
                                      fracEMI=0.5, updatePolicy='top-k', k=1)

Update policy: top-k
Training with MI-RNN loss for 1 rounds
Round: 0
Epoch   2 Batch   180 (  570) Loss 0.00457 Acc 0.88542 | Val acc 0.94594 | Model saved to /tmp/models/model-lstm, global_step 1000
INFO:tensorflow:Restoring parameters from /tmp/models/model-lstm-1000
Round: 1
Switching to EMI-Loss function
Epoch   2 Batch   180 (  570) Loss 0.30313 Acc 0.93229 | Val acc 0.97439 | Model saved to /tmp/models/model-lstm, global_step 1001
INFO:tensorflow:Restoring parameters from /tmp/models/model-lstm-1001


As the output above indicates, the last restored model is `/tmp/model-lstm-1001`. That is, with `MODEL_PREFIX = '/tmp/model-lstm'` and `GLOBAL_STEP=1001`.

In [7]:
def earlyPolicy_minProb(instanceOut, minProb, **kwargs):
    assert instanceOut.ndim == 2
    classes = np.argmax(instanceOut, axis=1)
    prob = np.max(instanceOut, axis=1)
    index = np.where(prob >= minProb)[0]
    if len(index) == 0:
        assert (len(instanceOut) - 1) == (len(classes) - 1)
        return classes[-1], len(instanceOut) - 1
    index = index[0]
    return classes[index], index


k = 2
predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,
                                                               minProb=0.99, keep_prob=1.0)
bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)
print('Accuracy at k = %d: %f' % (k,  np.mean((bagPredictions == BAG_TEST).astype(int))))

Accuracy at k = 2: 0.919919


## 2. Loading a Saved Graph into EMI-Driver

We will reset the computation graph, load a saved graph into the current `EMI_Driver` and verify its outputs.

In [8]:
tf.reset_default_graph()

emiDriver.loadSavedGraphToNewSession(MODEL_PREFIX, 1001)
k = 2
predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,
                                                               minProb=0.99, keep_prob=1.0)
bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)
print('Accuracy at k = %d: %f' % (k,  np.mean((bagPredictions == BAG_TEST).astype(int))))


INFO:tensorflow:Restoring parameters from /tmp/models/model-lstm-1001
Accuracy at k = 2: 0.919919


## 3. Initializing using a Saved Graph

Here we will construct a new computation graph, but will use a previously trained model to initialize it.

In [9]:
# Making sure the old graph and sessions are closed
sess = emiDriver.getCurrentSession()
sess.close()
tf.reset_default_graph()

Use the `GraphManager` to load the saved graph and load it into a new session.

In [10]:
sess = tf.Session()
graphManager = edgeml.utils.GraphManager()
graph = graphManager.loadCheckpoint(sess, MODEL_PREFIX, globalStep=1001)

INFO:tensorflow:Restoring parameters from /tmp/models/model-lstm-1001


Construct the forward graph as before, but provide the loaded `graph` as an argument to `__init__`.

In [11]:
inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS, NUM_OUTPUT, graph=graph)
emiLSTM = EMI_BasicLSTM(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS,
                        forgetBias=FORGET_BIAS, useDropout=USE_DROPOUT, graph=graph)
emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy', graph=graph)

g1 = graph
with g1.as_default():
    x_batch, y_batch = inputPipeline()
    y_cap = emiLSTM(x_batch)
    emiTrainer(y_cap, y_batch)
    
with g1.as_default():
    emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)

Let `EMI_Driver` know that we already have a session in place.

In [12]:
emiDriver.setSession(sess)

In [13]:
k = 2
predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test, earlyPolicy_minProb,
                                                               minProb=0.99, keep_prob=1.0)
bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k, numClass=NUM_OUTPUT)
print('Accuracy at k = %d: %f' % (k,  np.mean((bagPredictions == BAG_TEST).astype(int))))

Accuracy at k = 2: 0.919919


## 4. Restoring from Numpy Matrices

We first extract the model matrices from the graph and dump it into `.npy` files. Then we load it back again and initialize a new graph with these matrices.

In [14]:
graph = tf.get_default_graph()
W1 = graph.get_tensor_by_name('W1:0')
B1 = graph.get_tensor_by_name('B1:0')
allVars = emiLSTM.varList + [W1, B1]
sess = emiDriver.getCurrentSession()
allVars = sess.run(allVars)

base = '/tmp/models/'
np.save(base + 'kernel.npy', allVars[0])
np.save(base + 'bias.npy', allVars[1])
np.save(base + 'W1.npy', allVars[2])
np.save(base + 'B1.npy', allVars[3])

Reset the current session and graph

In [15]:
sess = emiDriver.getCurrentSession()
sess.close()
tf.reset_default_graph()

Load the numpy matrices that will be used to initialize the graph.

In [16]:
base = '/tmp/models/'
kernel = np.load(base + 'kernel.npy')
bias = np.load(base + 'bias.npy')
W = np.load(base + 'W1.npy')
B = np.load(base + 'B1.npy')

Proceed with graph construction as normally done, except that we add the requisite assignment operations.

In [17]:
inputPipeline = EMI_DataPipeline(NUM_SUBINSTANCE, NUM_TIMESTEPS, NUM_FEATS,
                                 NUM_OUTPUT)
emiLSTM = EMI_BasicLSTM(NUM_SUBINSTANCE, NUM_HIDDEN, NUM_TIMESTEPS, NUM_FEATS,
                        forgetBias=FORGET_BIAS, useDropout=USE_DROPOUT)
emiTrainer = EMI_Trainer(NUM_TIMESTEPS, NUM_OUTPUT, lossType='xentropy')

tf.reset_default_graph()
graph = tf.Graph()

with graph.as_default():
    x_batch, y_batch = inputPipeline()
    y_cap = emiLSTM(x_batch)
    emiTrainer(y_cap, y_batch)
    # Add the assignment operations
    emiLSTM.addBaseAssignOps(graph, [kernel, bias])
    emiLSTM.addExtendedAssignOps(graph, W, B)
    # Setup the driver. You can run the initializations manually as well
    emiDriver = EMI_Driver(inputPipeline, emiLSTM, emiTrainer)

emiDriver.initializeSession(graph)
# Run the assignment operations
sess = emiDriver.getCurrentSession()
sess.run(emiLSTM.assignOps)

k = 2
predictions, predictionStep = emiDriver.getInstancePredictions(x_test, y_test,
                                                               earlyPolicy_minProb,
                                                               minProb=0.99,
                                                               keep_prob=1.0)
bagPredictions = emiDriver.getBagPredictions(predictions, minSubsequenceLen=k,
                                             numClass=NUM_OUTPUT)
print('PART IV: Accuracy at k = %d: %f' % (k,  np.mean((bagPredictions ==
                                                        BAG_TEST).astype(int))))

PART IV: Accuracy at k = 2: 0.919919
