# Advanced ML Part II // Lecture 07 Scratch // includes RNN

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spio

In [2]:
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
def softmax(x):
    e_ = np.exp(x - np.max(x))
    return e_ / e_.sum()

### Some basic loading and inspection of Shakespeare

In [4]:
# load and inspect data
shakes = open('shakespeare_input.txt')
l = 0
for line in shakes:
    if l < 20: 
        print(line, end="")
        l += 1
    else:
        shakes.close()
        break

In [None]:
# load and format data
all_text = open('shakespeare_input.txt').read() 
#all_text = 'the quick brown fox jumped over the lazy dog '*10000
charset = list(set(all_text))
n_in = len(charset)
n_train = len(all_text)
print('The {} inputs are: \n {}'.format(n_in,charset))
# useful lookups
ch2ind = {x:y for x,y in zip(charset,range(n_in))}
ind2ch = {x:y for y,x in ch2ind.items()}
# useful helper functions
def ch2onehot(x):
    return (np.arange(n_in) == np.array([ch2ind[xi] for xi in x]).reshape([-1,1])).astype(int)
def ind2onehot(x):
    return (np.arange(n_in) == np.array(x).reshape([-1,1])).astype(int)

### Construct simple input output Network (cf unigram model, aka no hidden state)

In [None]:
class Network0:
    
    def __init__(self, session, n_in , n_out):
        self.session = session
        self.n_in = n_in
        self.n_out = n_out
        self.n_hidden = 60
        # data placeholders
        self.x = tf.placeholder(tf.float32, [None, n_in], name='x')
        self.y = tf.placeholder(tf.float32, [None, n_out], name='y')
        self.x_in = tf.reshape(self.x, [-1,self.n_in])
        # 2 layer network
        self.W_fc1 = tf.get_variable('W_fc1', shape=[self.n_in,self.n_hidden])
        self.b_fc1 = tf.get_variable('b_fc1', shape=[self.n_hidden])
        self.h_fc1 = tf.nn.relu(tf.add(tf.matmul(self.x_in, self.W_fc1), self.b_fc1, name='layer1'))
        self.W_fc2 = tf.get_variable('W_fc2', shape=[self.n_hidden,self.n_out])
        self.b_fc2 = tf.get_variable('b_fc2', shape=[self.n_out])
        self.logits = tf.add(tf.matmul(self.h_fc1, self.W_fc2), self.b_fc2, name='layer2')
        self.ypred = tf.nn.softmax(self.logits)
        # loss, train_step, etc.
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y,logits=self.logits, name='cross_ent_terms'), name='cross_ent')
        self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.loss)
    
    def sample(self, x): 
        # evaluate network and draw from resulting softmax multinomials.
        pred_vals = self.session.run(self.ypred, feed_dict={self.x:np.reshape(x,[-1,self.n_in])})
        return [np.random.multinomial(1,pvals=pv) for pv in pred_vals]

    def compute_logits(self, x):
        # evaluate the network and return the logit values
        return self.session.run(self.logits, feed_dict={self.x:np.reshape(x,[-1,self.n_in])})
    
    def train(self, x_batch, y_batch):
        # take a training step
        _ = self.session.run(self.train_step, feed_dict={self.x: x_batch, self.y: y_batch})

### Run simple example

In [None]:
batch_size = 128

with tf.Graph().as_default():
    with tf.Session() as sess:
    #with tf.compat.v1.Session() as sess:
        # create a Network
        g = Network0(sess, n_in, n_in)
        # usual tf initialization
        sess.run(tf.global_variables_initializer())      
        ####
        # write some text before any training
        ####
        x_prev = np.floor(np.random.rand(1)*(n_in)).astype(int)
        text0 = ind2ch[x_prev[0]]
        for i in range(90):
            x = np.argmax(g.compute_logits(ind2onehot(x_prev)))
            #x = np.argmax(g.sample(ind2onehot(x_prev)))
            text0 += ind2ch[x]
            x_prev = x
        
        ####
        # Train on some digits
        ####
        for i in range(5001):
            batch = np.floor(np.random.rand(batch_size)*(n_train-1)).astype(int)
            x_batch = ch2onehot([all_text[b] for b in batch])
            y_batch = ch2onehot([all_text[b+1] for b in batch])

            # now run
            g.train(x_batch,y_batch)
        
        
        ####
        # write some text after some training
        ####
        x_prev = np.floor(np.random.rand(1)*(n_in)).astype(int)
        text1 = ind2ch[x_prev[0]]
        for i in range(90):
            x = np.argmax(g.compute_logits(ind2onehot(x_prev)))
            #x = np.argmax(g.sample(ind2onehot(x_prev)))
            text1 += ind2ch[x]
            x_prev = x
        

### Output

In [None]:
print('----Pre-training Sample----\n {}'.format(text0))
#print('\n')
print('----Post-training Sample----\n {}'.format(text1))
# why is this the best we can do? 

### Now a Network with a hidden state (but no tf RNN abstractions yet)

In [None]:
class Network1:
    
    def __init__(self, session, n_in , n_out, n_context, n_hidden=64, rnn_type='1layer'):
        self.session = session
        self.n_in = n_in
        self.n_out = n_out
        self.n_context = n_context
        self.n_hidden = n_hidden
        self.rnn_type = rnn_type
        
        # data placeholders
        self.x = tf.placeholder(tf.float32, [None, self.n_context, self.n_in], name='x')
        self.y = tf.placeholder(tf.float32, [None, self.n_context, self.n_out], name='y')
        self.x_step = tf.placeholder(tf.float32, [1, self.n_in], name='x_step')
        #self.h_ = tf.placeholder(tf.float32,[None, self.n_hidden], name='h_')
        # initial hidden state
        self.h_ = tf.zeros([1,self.n_hidden])  # notice this 1 is inducing some broadcasting
        
        # define RNN
        self.Wyh = tf.get_variable('Why', shape=[self.n_hidden,self.n_out])
        self.by = tf.get_variable('by', shape=[self.n_out])
        if self.rnn_type=='1layer':
            self.Wxh = tf.get_variable('Wxh', shape=[self.n_in,self.n_hidden])
            self.bh = tf.get_variable('bh', shape=[self.n_hidden])
            self.Whh = tf.get_variable('Whh', shape=[self.n_hidden,self.n_hidden])        
        elif self.rnn_type=='2layer':
            self.n2 = 256
            self.Wx1 = tf.get_variable('Wx1', shape=[self.n_in,self.n2])
            self.b1 = tf.get_variable('b1', shape=[self.n2])
            self.Wh1 = tf.get_variable('Wh1', shape=[self.n_hidden,self.n2])        
            self.W1h = tf.get_variable('W1h', shape=[self.n2,self.n_hidden])
            self.bh = tf.get_variable('bh', shape=[self.n_hidden])
            
        # split (and squeeze) to get BPTT inputs, that is, a list of length n_context with usual [batch_size,n_in]
        # note: see code at bottom of notebook for critical ",[1]" fix
        self.xs = [tf.squeeze(xx,[1]) for xx in tf.split(self.x, self.n_context, axis=1)] 
        self.ys = [tf.squeeze(yy,[1]) for yy in tf.split(self.y, self.n_context, axis=1)] 
        
        # propagate h through context length
        self.h = []
        h = self.h_
        for x in self.xs:
            # here the first time h_ is broadcast to the np.shape(x,0) (as in, batch_size)
            h = self.rnn_layer(x,h) #tf.nn.tanh(tf.matmul(x, self.Wxh) + tf.matmul(h, self.Whh) + self.bh)
            self.h.append(h)
        
        # make outputs from h
        with tf.name_scope('model'):
            self.logits = []
            self.ypred = []
            for h in self.h:
                logits = self.rnn_logit(h)
                self.logits.append(logits)
                self.ypred.append(tf.nn.softmax(logits))
        
            # conform sizes with expectation
            self.ypred = tf.transpose(self.ypred,[1,0,2])

        # loss, train_step, etc.
        with tf.name_scope('loss'):
            self.losses = []
            self.accuracies = []
            for (l,y) in zip(self.logits,self.ys):
                self.losses.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=l)))
                self.accuracies.append(tf.reduce_mean(tf.cast(tf.equal(tf.argmax(l,1), tf.argmax(y,1)), tf.float32)))
            self.loss = tf.reduce_mean(self.losses)
            self.accuracy = tf.reduce_mean(self.accuracies)
                                       
        with tf.name_scope('opt'):
            self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.loss)
    
        # ops to propagate the network forward a step (for example, sampling after learned parameter)
        with tf.name_scope('step'):
            self.h_next = self.rnn_layer(self.x_step, self.h_)
            self.logit_next = self.rnn_logit(self.h_next)
    
        with tf.name_scope('summaries'):
            # create summary for loss and accuracy
            tf.summary.scalar('loss', self.loss) 
            tf.summary.scalar('accuracy', self.accuracy)
            # create summary for logits
            tf.summary.histogram('logits', self.logits)
            # create summary for input image
            #tf.summary.image('input', tf.reshape(x, [-1, 32, 32, 3]))
            self.summary_op = tf.summary.merge_all()
        
    def rnn_layer(self,x,h):
        with tf.name_scope('rnn_layer'):
            # this can be called either via training or stepping 
            if self.rnn_type=='1layer':
                return tf.nn.tanh(tf.matmul(x, self.Wxh) + tf.matmul(h, self.Whh) + self.bh)
            elif self.rnn_type=='2layer':
                fc1 = tf.nn.relu(tf.matmul(x, self.Wx1) + tf.matmul(h, self.Wh1) + self.b1)
                return tf.nn.tanh(tf.matmul(fc1, self.W1h) + self.bh)
            
    def rnn_logit(self,h):
        # called either via training or stepping
        with tf.name_scope('rnn_logit'):
            return tf.matmul(h, self.Wyh) + self.by
    
    def train(self, x_batch, y_batch, h_):
        # take a training step.
        _, h_out, loss, summary = self.session.run((self.train_step, self.h, self.loss, self.summary_op), feed_dict=
                                          {self.x: np.reshape(ch2onehot(x_batch),[-1, self.n_context, self.n_in]), 
                                           self.y: np.reshape(ch2onehot(y_batch),[-1, self.n_context, self.n_in]), 
                                           self.h_: h_})
        #self.session.run(self.train_step, feed_dict={self.x: np.reshape(ch2onehot(x_batch),[-1, self.n_context, self.n_in]), self.y: np.reshape(ch2onehot(y_batch),[-1, self.n_context, self.n_in])})
        # return the last hidden state h, which will seed the next batch.
        return (h_out[-1], loss, summary)
    
    def sample_step(self, x_step, h_ , sample=False, temp=1.0):
        # take a forward step, predict the next character, return the new hidden state.
        h_next, logit_next = self.session.run((self.h_next, self.logit_next), feed_dict=
                                          {self.x_step: np.reshape(ch2onehot(x_step),[1, self.n_in]), 
                                           self.h_: h_})
        #self.session.run(self.train_step, feed_dict={self.x: np.reshape(ch2onehot(x_batch),[-1, self.n_context, self.n_in]), self.y: np.reshape(ch2onehot(y_batch),[-1, self.n_context, self.n_in])})
        # return the last hidden state h, which will seed the next batch.
        if sample: 
            # sample from multinomial
            y_out = ind2ch[np.argmax(np.random.multinomial(1,pvals=softmax([temp*l for l in logit_next[0]]) ))]
        else:
            y_out = ind2ch[np.argmax(logit_next)]
            
        return (h_next, y_out)
    
    def sample_text(self, seed_char , m , sample=False):
        h = np.zeros([1,rnn.n_hidden])
        text_out = seed_char
        for j in range(100):
            # roll forward and predict text
            h, y = self.sample_step(text_out[-1],h)
            text_out += y
        return text_out

### Run it...

In [None]:
n_context = 50
model_type = '1layer'
n_hidden = 64
dir_name = 'logs/scratch07/{}_{}_{}'.format(model_type, n_hidden, n_context)
#all_text = 'The quick brown fox jumped over the lazy dog. '*1000

with tf.Graph().as_default():
    with tf.Session() as sess:
        # create a Network
        rnn = Network1(sess, n_in, n_in, n_context, n_hidden, model_type)
        # make summarywriter for tb
        summary_writer = tf.summary.FileWriter(dir_name, sess.graph)
        # usual tf initialization
        sess.run(tf.global_variables_initializer()) 
        
        # training 
        # walk through data from start to finish.  Walk through in blocks of BPTT
        epoch = 0
        batch = 0
        batches_per_epoch = np.floor(len(all_text)/n_context)
        h_prev = np.zeros([1,rnn.n_hidden])
        #losses =[]
        while epoch < 7:
            if (batch+1)*n_context+1  > (len(all_text)-1):
                # wrap to beginning and reset
                batch = 0
                epoch += 1
                h_prev = np.zeros([1,rnn.n_hidden])
            # assign data
            x_batch = all_text[batch*n_context:(batch+1)*n_context]
            y_batch = all_text[batch*n_context+1:(batch+1)*n_context+1]
            
            # training step
            h_prev, loss, summary = rnn.train( x_batch, y_batch, h_prev )
            
            # iterate
            batch += 1
            #losses.append(loss)
            # print diagnostic
            if batch%1000==0:
                k = (epoch*batches_per_epoch + batch).astype(int)
                summary_writer.add_summary(summary, k)
                print('______[epoch:{},batch:{},all batches:{}] has loss {}______'.format(epoch,batch,k,loss))
                # take the last hidden and target to seed a writing
                h = h_prev 
                text_out = y_batch[-1]
                for j in range(200):
                    # roll forward and fantasize text of length 200
                    h, y = rnn.sample_step(text_out[-1],h, sample=True, temp=min(batch/5000,5))
                    text_out += y
                print(text_out)
                print('')
            
        

### Now an LSTM network

In [None]:
class Network2:
    
    def __init__(self, session, n_in , n_out, n_context, n_hidden=64, rnn_type='1layer'):
        self.session = session
        self.n_in = n_in
        self.n_out = n_out
        self.n_context = n_context
        self.n_hidden = n_hidden
        self.rnn_type = rnn_type
        
        # data placeholders
        self.x = tf.placeholder(tf.float32, [None, self.n_context, self.n_in], name='x')
        self.y = tf.placeholder(tf.float32, [None, self.n_context, self.n_out], name='y')
        self.batch_size = tf.shape(self.x)[0]  # 0 for time_major=False in dynamic_rnn; else 1 for True

        self.x_step = tf.placeholder(tf.float32, [None, 1, self.n_in], name='x_step')
        # initial hidden state; None here is batch size... context is not needed as it is t=0
        self.c_ = tf.placeholder(tf.float32, [None,self.n_hidden], name='c_')
        self.h_ = tf.placeholder(tf.float32, [None,self.n_hidden], name='h_')
        # An LSTMStateTuple that can be fed as initial_state to dynamic_rnn
        self.state_ = tf.nn.rnn_cell.LSTMStateTuple(self.c_, self.h_)  # 2 x None x n_hidden
        
        # define RNN
        self.Wyh = tf.get_variable('Why', shape=[self.n_hidden,self.n_out])
        self.by = tf.get_variable('by', shape=[self.n_out])
        self.cell = tf.contrib.rnn.LSTMCell(self.n_hidden)
        # If cells are LSTMCells state will be a tuple containing a LSTMStateTuple for each cell.
        h_outs, self.state_out = tf.nn.dynamic_rnn(self.cell, self.x, initial_state=self.state_)
        # time_major=True implies time, batch, depth; see https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn
        # time_major=False implies batch, time, depth

        # now h_outs is batch,time, hidden size
        self.h = tf.reshape(h_outs,[-1,self.n_hidden])
        with tf.name_scope('model'):
            self.logits = self.rnn_logit(self.h)
            self.ypred = tf.nn.softmax(self.logits)
            
        # loss, train_step, etc.
        with tf.name_scope('loss'):
            y = tf.reshape(self.y,[-1,self.n_out]) # conform this to the unfolded matrix shape
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=self.logits))
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.logits,1), tf.argmax(y,1)),tf.float32))
                                       
        with tf.name_scope('opt'):
            self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.loss)
    
        # ops to propagate the network forward a step (for example, sampling after learned parameter)
        with tf.name_scope('step'):
            #self.h_next = self.rnn_layer(self.x_step, self.state_)
            self.h_step, self.state_step = tf.nn.dynamic_rnn(self.cell, self.x_step, initial_state=self.state_)
            self.logit_step = self.rnn_logit(self.h_step[0])
    
        with tf.name_scope('summaries'):
            # create summary for loss and accuracy
            tf.summary.scalar('loss', self.loss) 
            tf.summary.scalar('accuracy', self.accuracy)
            # create summary for logits
            tf.summary.histogram('logits', self.logits)
            # create summary for input image
            #tf.summary.image('input', tf.reshape(x, [-1, 32, 32, 3]))
            self.summary_op = tf.summary.merge_all()
        
    def rnn_logit(self,h):
        # called either via training or stepping
        with tf.name_scope('rnn_logit'):
            return tf.matmul(h, self.Wyh) + self.by
    
    def train(self, x_batch, y_batch, c_, h_):
        # take a training step.
        # note this is clunky... caller must partition state_out[0] as c_, state_out[1] as h_ when iterating
        _, state_out, loss, summary = self.session.run((self.train_step, self.state_out, self.loss, self.summary_op), feed_dict=
                                          {self.x: np.reshape(ch2onehot(x_batch),[-1, self.n_context, self.n_in]), 
                                           self.y: np.reshape(ch2onehot(y_batch),[-1, self.n_context, self.n_in]), 
                                           self.c_: c_,
                                           self.h_: h_})
        #self.session.run(self.train_step, feed_dict={self.x: np.reshape(ch2onehot(x_batch),[-1, self.n_context, self.n_in]), self.y: np.reshape(ch2onehot(y_batch),[-1, self.n_context, self.n_in])})
        # return the last hidden state h, which will seed the next batch.
        return (state_out, loss, summary)
    
    def sample_step(self, x_step, c_, h_ , sample=False, temp=1.0):
        # take a forward step, predict the next character, return the new hidden state.
        # note this is clunky... caller must partition state_out[0] as c_, state_out[1] as h_ when iterating
        state_step, logit_step = self.session.run((self.state_step, self.logit_step), feed_dict=
                                          {self.x_step: np.reshape(ch2onehot(x_step),[1, 1, self.n_in]), 
                                           self.c_: c_,
                                           self.h_: h_})
        #self.session.run(self.train_step, feed_dict={self.x: np.reshape(ch2onehot(x_batch),[-1, self.n_context, self.n_in]), self.y: np.reshape(ch2onehot(y_batch),[-1, self.n_context, self.n_in])})
        # return the last hidden state h, which will seed the next batch.
        if sample: 
            # sample from multinomial
            y_out = ind2ch[np.argmax(np.random.multinomial(1,pvals=softmax([temp*l for l in logit_step[0]]) ))]
        else:
            y_out = ind2ch[np.argmax(logit_step)]
            
        return (state_step, y_out)
    

### Run LSTM...

In [None]:
n_context = 50
model_type = 'lstm'
n_hidden = 256
dir_name = 'logs/scratch07/{}_{}_{}'.format(model_type, n_hidden, n_context)
#all_text = 'The quick brown fox jumped over the lazy dog. '*1000

with tf.Graph().as_default():
    with tf.Session() as sess:
        # create a Network
        rnn = Network2(sess, n_in, n_in, n_context, n_hidden, model_type)
        # make summarywriter for tb
        summary_writer = tf.summary.FileWriter(dir_name, sess.graph)
        # usual tf initialization
        sess.run(tf.global_variables_initializer()) 
        
        # training 
        # walk through data from start to finish.  Walk through in blocks of BPTT
        epoch = 0
        batch = 0
        batches_per_epoch = np.floor(len(all_text)/n_context)
        
        state_prev = np.zeros([2,1,rnn.n_hidden])
        state_prev[0] = np.zeros([1,rnn.n_hidden])
        state_prev[1] = np.zeros([1,rnn.n_hidden])
        #losses =[]
        while epoch < 15:
            if (batch+1)*n_context+1+epoch  > (len(all_text)-1):
                # wrap to beginning and reset
                batch = 0
                epoch += 1
                h_prev = np.zeros([1,rnn.n_hidden])
            # assign data, shifting by 1 each epoch 
            x_batch = all_text[batch*n_context+epoch:(batch+1)*n_context+epoch]
            y_batch = all_text[batch*n_context+1+epoch:(batch+1)*n_context+1+epoch]
            
            # training step
            state_prev, loss, summary = rnn.train( x_batch, y_batch, state_prev[0], state_prev[1] )
            
            # iterate
            batch += 1
            #losses.append(loss)
            # print diagnostic
            if batch%1000==0:
                k = (epoch*batches_per_epoch + batch).astype(int)
                summary_writer.add_summary(summary, k)
                print('______[epoch:{},batch:{},all batches:{}] has loss {}______'.format(epoch,batch,k,loss))
                # take the last hidden and target to seed a writing
                h = state_prev 
                text_out = y_batch[-1]
                for j in range(200):
                    # roll forward and predict text
                    h, y = rnn.sample_step(text_out[-1], h[0] , h[1] , sample=True, temp=min(batch/5000,5))
                    text_out += y
                print(text_out)
                print('')
            
        

### Slide extras; no didactic purpose

In [None]:
# Test code to play with LSTMStateTuple object...
x_in = np.random.rand(5,30,4)
h0 = np.zeros([5,22])
# time_major=True implies time, batch, input size; see https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn
# time_major=False implies batch, time, input size
with tf.Graph().as_default():
    with tf.Session() as sess:
        # create a Network
        x = tf.placeholder(tf.float32, [None,None,4], name='x')
        c_state = tf.placeholder(tf.float32, [None,22], name='c')
        h_state = tf.placeholder(tf.float32, [None,22], name='h')
        initial_state = tf.nn.rnn_cell.LSTMStateTuple(c_state, h_state)

        # here's the RNN
        cell = tf.contrib.rnn.LSTMCell(22)
        
        batch_size    = tf.shape(x)[0]  # 0 for time_major=False in dynamic_rnn; else 1 for True
        h_ = cell.zero_state(batch_size, tf.float32)

        #lstmx,lstmh = cell(x, h)
        rnn_outputs, rnn_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32, initial_state=initial_state)

        # usual tf initialization
        sess.run(tf.global_variables_initializer()) 
        out = sess.run(rnn_state, feed_dict={x:x_in, c_state:h0, h_state:h0})
print(np.shape(out))
#out

In [None]:
# note, passing in batches of size 1 can trigger an issue because the squeeze below will dump dimension 0
# this is then a problem because matmul gets a [,4] instead of a matrix [1,4] (that it wants).
# you can sort of fix this for 1,2,4 (1 here is the batch size) by changing to tf.matmul([xx],W)
# but then of course that's a problem for 11,2,4 (or whatever batch size) because then a nested matrix
# the right solve is to squeeze carefully... as it is now
x_in = np.random.rand(2,2,4)
with tf.Graph().as_default():
    with tf.Session() as sess:
        # create a Network
        x = tf.placeholder(tf.float32, [None, 2, 4], name='x')
        W = tf.get_variable('W', shape=[4,3])
        def f(x):
            return tf.matmul(x, W)
        x_step = tf.placeholder(tf.float32, [2, 4], name='x')
        def g(x):
            return tf.matmul(x, W)
        
        xs = [tf.squeeze(xx,[1]) for xx in tf.split(x, 2, axis=1)] 
        xW = []
        for xx in xs:
            xW.append(f(xx))
        
        xW_step = g(x_step)
        
        # usual tf initialization
        sess.run(tf.global_variables_initializer()) 
        out = sess.run(xW, feed_dict={x:x_in})
        out_step = sess.run(xW_step, feed_dict={x_step:x_in[0,:,:]})
        
print(out)
print(out_step)

In [None]:
def compute_logits(self, text_in):
        # evaluate the network and return the logit values
        return self.session.run(self.logits, feed_dict={self.x:np.reshape(ch2onehot(text_in),[-1, self.n_context, self.n_in])})
    
    def sample_text(self, text_in): 
        # evaluate network and draw from resulting softmax multinomials.
        pred_vals = self.session.run(self.ypred, feed_dict={self.x:np.reshape(ch2onehot(text_in),[-1, self.n_context, self.n_in])})
        #return pred_vals 
        return [ind2ch[np.argmax(np.random.multinomial(1,pvals=pv))] for pv in pred_vals.reshape([-1,n_in])]
        #[np.random.multinomial(1,pvals=pv) for pv in pred_vals]
    
    def predict_text(self, text_in): 
        # evaluate network and draw from resulting softmax multinomials.
        pred_vals = self.session.run(self.ypred, feed_dict={self.x:np.reshape(ch2onehot(text_in),[-1, self.n_context, self.n_in])})
        return [ind2ch[np.argmax(pv)] for pv in pred_vals.reshape([-1,n_in])]
        
    

In [None]:
plt.plot(losses, linewidth=2)
plt.xlabel('every 1000 contexts')
plt.ylabel('loss')
plt.title('Simple RNN with hidden state')
plt.show()
        