## Task 0


In [1]:
import tensorflow.compat.v1 as tf
import numpy as np
import urllib
tf.compat.v1.disable_eager_execution()

# the number of iterations to train for
numTrainingIters = 10000

# the number of hidden neurons that hold the state of the RNN
hiddenUnits = 1000

# the number of classes that we are learning over
numClasses = 3

# the number of data points in a batch
batchSize = 100

# this function takes a dictionary (called data) which contains
# of (dataPointID, (classNumber, matrix)) entries.  Each matrix
# is a sequence of vectors; each vector has a one-hot-encoding of
# an ascii character, and the sequence of vectors corresponds to
# one line of text.  classNumber indicates which file the line of
# text came from.
#
# The argument maxSeqLen is the maximum length of a line of text
# seen so far.  fileName is the name of a file whose contents
# we want to add to data.  classNum is an indicator of the class
# we are going to associate with text from that file.  linesToUse
# tells us how many lines to sample from the file.
#
# The return val is the new maxSeqLen, as well as the new data
# dictionary with the additional lines of text added
def addToData (maxSeqLen, data, fileName, classNum, linesToUse):
    #
    # open the file and read it in
    response = urllib.request.urlopen(fileName)
    content = response.readlines ()
    #
    # sample linesToUse numbers; these will tell us what lines
    # from the text file we will use
    myInts = np.random.randint (0, len(content), linesToUse)
    #
    # i is the key of the next line of text to add to the dictionary
    i = len(data)
    #
    # loop thru and add the lines of text to the dictionary
    for whichLine in myInts.flat:
        #
        # get the line and ignore it if it has nothing in it
        line = content[whichLine].decode("utf-8")
        if line.isspace () or len(line) == 0:
            continue;
        #
        # take note if this is the longest line we've seen
        if len (line) > maxSeqLen:
            maxSeqLen = len (line)
        #
        # create the matrix that will hold this line
        temp = np.zeros((len(line), 256))
        #
        # j is the character we are on
        j = 0
        #
        # loop thru the characters
        for ch in line:
            #
            # non-ascii? ignore
            if ord(ch) >= 256:
                continue
            #
            # one hot!
            temp[j][ord(ch)] = 1
            #
            # move onto the next character
            j = j + 1
            #
        # remember the line of text
        data[i] = (classNum, temp)
        #
        # move onto the next line
        i = i + 1
    #
    # and return the dictionary with the new data
    return (maxSeqLen, data)

# this function takes as input a data set encoded as a dictionary
# (same encoding as the last function) and pre-pends every line of
# text with empty characters so that each line of text is exactly
# maxSeqLen characters in size
def pad (maxSeqLen, data):
   #
   # loop thru every line of text
   for i in data:
        #
        # access the matrix and the label
        temp = data[i][1]
        label = data[i][0]
        #
        # get the number of chatacters in this line
        len = temp.shape[0]
        #
        # and then pad so the line is the correct length
        padding = np.zeros ((maxSeqLen - len,256))
        data[i] = (label, np.transpose (np.concatenate ((padding, temp), axis = 0)))
   #
   # return the new data set
   return data

# this generates a new batch of training data of size batchSize from the
# list of lines of text data. This version of generateData is useful for
# an RNN because the data set x is a NumPy array with dimensions
# [batchSize, 256, maxSeqLen]; it can be unstacked into a series of
# matrices containing one-hot character encodings for each data point
# using tf.unstack(inputX, axis=2)
def generateDataRNN (maxSeqLen, data):
    #
    # randomly sample batchSize lines of text
    myInts = np.random.randint (0, len(data), batchSize)
    #
    # stack all of the text into a matrix of one-hot characters
    x = np.stack ([data[i][1] for i in myInts.flat])
    #
    # and stack all of the labels into a vector of labels
    y = np.stack ([np.array((data[i][0])) for i in myInts.flat])

    #
    # return the pair
    return (x, y)

# this also generates a new batch of training data, but it represents
# the data as a NumPy array with dimensions [batchSize, 256 * maxSeqLen]
# where for each data point, all characters have been appended.  Useful
# for feed-forward network training
def generateDataFeedForward (maxSeqLen, data):
    #
    # randomly sample batchSize lines of text
    myInts = np.random.randint (0, len(data), batchSize)
    #
    # stack all of the text into a matrix of one-hot characters
    x = np.stack (data[i][1].flatten () for i in myInts.flat)
    #
    # and stack all of the labels into a vector of labels
    y = np.stack(np.array((data[i][0])) for i in myInts.flat)
    #
    # return the pair
    return (x, y)

# create the data dictionary
maxSeqLen = 0
data = {}

# load up the three data sets
(maxSeqLen, data) = addToData (maxSeqLen, data, "https://s3.amazonaws.com/chrisjermainebucket/text/Holmes.txt", 0, 10000)
(maxSeqLen, data) = addToData (maxSeqLen, data, "https://s3.amazonaws.com/chrisjermainebucket/text/war.txt", 1, 10000)
(maxSeqLen, data) = addToData (maxSeqLen, data, "https://s3.amazonaws.com/chrisjermainebucket/text/william.txt", 2, 10000)

# pad each entry in the dictionary with empty characters as needed so
# that the sequences are all of the same length
data = pad (maxSeqLen, data)

# now we build the TensorFlow computation... there are two inputs,
# a batch of text lines and a batch of labels
inputX = tf.placeholder(tf.float32, [batchSize, 256, maxSeqLen])
inputY = tf.placeholder(tf.int32, [batchSize])

# this is the inital state of the RNN, before processing any data
initialState = tf.placeholder(tf.float32, [batchSize, hiddenUnits])

# the weight matrix that maps the inputs and hidden state to a set of values
Wfir = tf.Variable(np.random.normal(0, 0.01, (hiddenUnits + 256, hiddenUnits)), dtype=tf.float32)
Wsec = tf.Variable(np.random.normal(0, 0.01, (hiddenUnits, hiddenUnits)), dtype=tf.float32)

# weights and bias for the final classification
W2 = tf.Variable(np.random.normal (0, 0.05, (hiddenUnits, numClasses)),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,numClasses)), dtype=tf.float32)

# unpack the input sequences so that we have a series of matrices,
# each of which has a one-hot encoding of the current character from
# every input sequence
sequenceOfLetters = tf.unstack(inputX, axis=2)

# now we implement the forward pass
currentState = initialState
for timeTick in sequenceOfLetters:
    #
    # concatenate the state with the input, then compute the next state
    inputPlusState = tf.concat([timeTick, currentState], 1)
    next_state = tf.tanh(tf.matmul(inputPlusState, Wfir))
    last_state = tf.tanh(tf.matmul(next_state, Wsec))
    currentState = last_state

# compute the set of outputs
outputs = tf.matmul(currentState, W2) + b2

predictions = tf.nn.softmax(outputs)

# compute the loss
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=inputY)
totalLoss = tf.reduce_mean(losses)

# use gradient descent to train
trainingAlg = tf.compat.v1.train.AdagradOptimizer(0.01).minimize(totalLoss)

# and train!!
with tf.Session() as sess:
    #
    # initialize everything
    sess.run(tf.compat.v1.global_variables_initializer())
    #
    # and run the training iters
    for epoch in range(numTrainingIters):
        #
        # get some data
        x, y = generateDataRNN (maxSeqLen, data)
        #
        # do the training epoch
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _trainingAlg, _currentState, _predictions, _outputs = sess.run(
                [totalLoss, trainingAlg, currentState, predictions, outputs],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        #
        # just FYI, compute the number of correct predictions
        numCorrect = 0
        for i in range (len(y)):
           maxPos = -1
           maxVal = 0.0
           for j in range (numClasses):
               if maxVal < _predictions[i][j]:
                   maxVal = _predictions[i][j]
                   maxPos = j
           if maxPos == y[i]:
               numCorrect = numCorrect + 1
        #
        # print out to the screen
        print("Step", epoch, "Loss", _totalLoss, "Correct", numCorrect, "out of", batchSize)


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 5000 Loss 1.0941586 Correct 38 out of 100
Step 5001 Loss 1.0871301 Correct 42 out of 100
Step 5002 Loss 1.0894651 Correct 40 out of 100
Step 5003 Loss 1.0971336 Correct 35 out of 100
Step 5004 Loss 1.0929561 Correct 38 out of 100
Step 5005 Loss 1.089942 Correct 40 out of 100
Step 5006 Loss 1.1131809 Correct 26 out of 100
Step 5007 Loss 1.0932478 Correct 38 out of 100
Step 5008 Loss 1.0970935 Correct 36 out of 100
Step 5009 Loss 1.0987045 Correct 34 out of 100
Step 5010 Loss 1.0943635 Correct 37 out of 100
Step 5011 Loss 1.0876888 Correct 42 out of 100
Step 5012 Loss 1.0940531 Correct 37 out of 100
Step 5013 Loss 1.0960104 Correct 36 out of 100
Step 5014 Loss 1.0973576 Correct 36 out of 100
Step 5015 Loss 1.0765697 Correct 49 out of 100
Step 5016 Loss 1.0945832 Correct 37 out of 100
Step 5017 Loss 1.0896951 Correct 40 out of 100
Step 5018 Loss 1.0825075 Correct 45 out of 100
Step 5019 Loss 1.0977702 Correct 35 out of 

## Task 1

In [2]:
# Splits the data into training and testing set
def train_test_split(data, train, test, test_length):
    items = list(data.items())
    train_items = items[:-test_length]
    test_items = items[-test_length:]

    for idx, (key, value) in enumerate(train_items, start=len(train)):
        train[idx] = value

    for idx, (key, value) in enumerate(test_items, start=len(test)):
        test[idx] = value
        data.pop(key)

    return train, test


# create the data dictionary
maxSeqLen = 0
training_data, testing_data = {} , {}
data = {}

# load up the three data sets
(maxSeqLen1, data) = addToData(maxSeqLen, data, "https://s3.amazonaws.com/chrisjermainebucket/text/Holmes.txt", 0, 10000)
training_data, testing_data = train_test_split(data, training_data, testing_data , 1000)
data = {}
(maxSeqLen2, data) = addToData(maxSeqLen, data, "https://s3.amazonaws.com/chrisjermainebucket/text/war.txt", 1, 10000)
training_data, testing_data = train_test_split(data, training_data, testing_data , 1000)
data = {}
(maxSeqLen3, data) = addToData(maxSeqLen, data, "https://s3.amazonaws.com/chrisjermainebucket/text/william.txt", 2, 10000)
training_data, testing_data = train_test_split(data, training_data, testing_data ,  1000)

# pad each entry in the dictionary with empty characters as needed so
# that the sequences are all of the same length
maxSeqLen = max(max(maxSeqLen1,maxSeqLen2),maxSeqLen3)
training_data= pad(maxSeqLen, training_data)
testing_data = pad(maxSeqLen,testing_data)


print("Training data sample:")
for key, (cls, matrix) in list(training_data.items())[:20]:
    print(f"Key: {key}, Class: {cls}, Matrix shape: {matrix.shape}")

print("\nTesting data sample:")
for key, (cls, matrix) in list(testing_data.items())[:20]:
    print(f"Key: {key}, Class: {cls}, Matrix shape: {matrix.shape}")

print(len(training_data))
print(len(testing_data))


Training data sample:
Key: 0, Class: 0, Matrix shape: (256, 83)
Key: 1, Class: 0, Matrix shape: (256, 83)
Key: 2, Class: 0, Matrix shape: (256, 83)
Key: 3, Class: 0, Matrix shape: (256, 83)
Key: 4, Class: 0, Matrix shape: (256, 83)
Key: 5, Class: 0, Matrix shape: (256, 83)
Key: 6, Class: 0, Matrix shape: (256, 83)
Key: 7, Class: 0, Matrix shape: (256, 83)
Key: 8, Class: 0, Matrix shape: (256, 83)
Key: 9, Class: 0, Matrix shape: (256, 83)
Key: 10, Class: 0, Matrix shape: (256, 83)
Key: 11, Class: 0, Matrix shape: (256, 83)
Key: 12, Class: 0, Matrix shape: (256, 83)
Key: 13, Class: 0, Matrix shape: (256, 83)
Key: 14, Class: 0, Matrix shape: (256, 83)
Key: 15, Class: 0, Matrix shape: (256, 83)
Key: 16, Class: 0, Matrix shape: (256, 83)
Key: 17, Class: 0, Matrix shape: (256, 83)
Key: 18, Class: 0, Matrix shape: (256, 83)
Key: 19, Class: 0, Matrix shape: (256, 83)

Testing data sample:
Key: 0, Class: 0, Matrix shape: (256, 83)
Key: 1, Class: 0, Matrix shape: (256, 83)
Key: 2, Class: 0, Matr

In [3]:
# now we build the TensorFlow computation... there are two inputs,
# a batch of text lines and a batch of labels
inputX = tf.placeholder(tf.float32, [batchSize, 256, maxSeqLen])
inputY = tf.placeholder(tf.int32, [batchSize])

# this is the inital state of the RNN, before processing any data
initialState = tf.placeholder(tf.float32, [batchSize, hiddenUnits])

# the weight matrix that maps the inputs and hidden state to a set of values
Wfir = tf.Variable(np.random.normal(0, 0.01, (hiddenUnits + 256, hiddenUnits)), dtype=tf.float32)
Wsec = tf.Variable(np.random.normal(0, 0.01, (hiddenUnits, hiddenUnits)), dtype=tf.float32)


# weights and bias for the final classification
W2 = tf.Variable(np.random.normal (0, 0.05, (hiddenUnits, numClasses)),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,numClasses)), dtype=tf.float32)

# unpack the input sequences so that we have a series of matrices,
# each of which has a one-hot encoding of the current character from
# every input sequence
sequenceOfLetters = tf.unstack(inputX, axis=2)

"""
Forward Path
"""
# now we implement the forward path
currentState = initialState
for timeTick in sequenceOfLetters:
    #
    # concatenate the state with the input, then compute the next state
    inputPlusState = tf.concat([timeTick, currentState], 1)
    next_state = tf.tanh(tf.matmul(inputPlusState, Wfir))
    last_state = tf.tanh(tf.matmul(next_state, Wsec))
    currentState = last_state

# compute the set of outputs
outputs = tf.matmul(currentState, W2) + b2

predictions = tf.nn.softmax(outputs)

# compute the loss
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=inputY)
totalLoss = tf.reduce_mean(losses)

# use gradient descent to train
trainingAlg = tf.compat.v1.train.AdagradOptimizer(0.01).minimize(totalLoss)

"""
Train and build the model
"""
# and train!!
with tf.Session() as sess:
    #
    # initialize everything
    sess.run(tf.global_variables_initializer())
    #
    # and run the training iters
    for epoch in range(numTrainingIters):
        #
        # get some data
        x, y = generateDataRNN (maxSeqLen, training_data)
        #
        # do the training epoch
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _trainingAlg, _currentState, _predictions, _outputs = sess.run(
                [totalLoss, trainingAlg, currentState, predictions, outputs],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        #
        # just FYI, compute the number of correct predictions
        numCorrect = 0
        for i in range (len(y)):
            maxPos = -1
            maxVal = 0.0
            for j in range (numClasses):
                if maxVal < _predictions[i][j]:
                    maxVal = _predictions[i][j]
                    maxPos = j
            if maxPos == y[i]:
                numCorrect = numCorrect + 1
        #
        # print out to the screen
        print("Step", epoch, "Loss", _totalLoss, "Correct", numCorrect, "out of", batchSize)
    """
    test
    """
    test_loss = 0
    numCorrect = 0
    for j in range(0, len(testing_data), batchSize):
        x = np.stack([testing_data[i][1] for i in range(j,j+batchSize)])
        y = np.stack([np.array((testing_data[i][0])) for i in range(j,j+batchSize)])
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _predictions = sess.run(
                [totalLoss, predictions],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        # update loss
        test_loss += _totalLoss/30
        # calculate number of correct predictions
        for m in range(len(y)):
            maxPos = -1
            maxVal = 0.0
            for n in range(numClasses):
                if maxVal < _predictions[m][n]:
                    maxVal = _predictions[m][n]
                    maxPos = n
            if maxPos == y[m]:
                numCorrect += 1
    print("Loss for 3000 randomly chosen document is", test_loss)
    print("Number correct labels is",numCorrect,"out of 3000")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 5002 Loss 1.105238 Correct 31 out of 100
Step 5003 Loss 1.0854268 Correct 43 out of 100
Step 5004 Loss 1.0790979 Correct 46 out of 100
Step 5005 Loss 1.1026039 Correct 33 out of 100
Step 5006 Loss 1.08873 Correct 41 out of 100
Step 5007 Loss 1.0930458 Correct 38 out of 100
Step 5008 Loss 1.0935105 Correct 38 out of 100
Step 5009 Loss 1.1006058 Correct 34 out of 100
Step 5010 Loss 1.0855614 Correct 43 out of 100
Step 5011 Loss 1.1053635 Correct 31 out of 100
Step 5012 Loss 1.0843911 Correct 43 out of 100
Step 5013 Loss 1.0826901 Correct 44 out of 100
Step 5014 Loss 1.0942438 Correct 37 out of 100
Step 5015 Loss 1.0922668 Correct 39 out of 100
Step 5016 Loss 1.0786761 Correct 46 out of 100
Step 5017 Loss 1.0909864 Correct 39 out of 100
Step 5018 Loss 1.0868342 Correct 41 out of 100
Step 5019 Loss 1.0999142 Correct 34 out of 100
Step 5020 Loss 1.098094 Correct 36 out of 100
Step 5021 Loss 1.1038461 Correct 33 out of 100

# Task 2

In [6]:
# the number of hidden neurons that hold the state of the RNN
hiddenUnits = 500

# now we build the TensorFlow computation... there are two inputs,
# a batch of text lines and a batch of labels
inputX = tf.placeholder(tf.float32, [batchSize, 256, maxSeqLen])
inputY = tf.placeholder(tf.int32, [batchSize])

# this is the inital state of the RNN, before processing any data
initialState = tf.placeholder(tf.float32, [batchSize, hiddenUnits])

# the weight matrix that maps the inputs and hidden state to a set of values
Wfir = tf.Variable(np.random.normal(0, 0.01, (2*hiddenUnits + 256, hiddenUnits)), dtype=tf.float32)
Wsec = tf.Variable(np.random.normal(0, 0.01, (hiddenUnits, hiddenUnits)), dtype=tf.float32)


# weights and bias for the final classification
W2 = tf.Variable(np.random.normal (0, 0.05, (hiddenUnits, numClasses)),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,numClasses)), dtype=tf.float32)

# unpack the input sequences so that we have a series of matrices,
# each of which has a one-hot encoding of the current character from
# every input sequence
sequenceOfLetters = tf.unstack(inputX, axis=2)


"""
Forward Path
"""
# now we implement the forward path
currentState = initialState
state_history = [tf.zeros_like(initialState) for _ in range(10)]

# for timeTick in sequenceOfLetters:
#     #
#     # concatenate the state with the input, then compute the next state
#     inputPlusState = tf.concat([timeTick, currentState], 1)
#     next_state = tf.tanh(tf.matmul(inputPlusState, Wfir))
#     last_state = tf.tanh(tf.matmul(next_state, Wsec))
#     currentState = last_state


for t, timeTick in enumerate(sequenceOfLetters):
    state_from_10_ticks_ago = state_history[t % 10] if t >= 10 else initialState
    state_history[t % 10] = currentState
    inputPlusState = tf.concat([timeTick, currentState, state_from_10_ticks_ago], 1)
    currentState = tf.tanh(tf.matmul(inputPlusState, Wfir))

# compute the set of outputs
outputs = tf.matmul(currentState, W2) + b2

predictions = tf.nn.softmax(outputs)

# compute the loss
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=inputY)
totalLoss = tf.reduce_mean(losses)

# use gradient descent to train
trainingAlg = tf.compat.v1.train.AdagradOptimizer(0.01).minimize(totalLoss)


"""
Train and build the model
"""
# and train!!
with tf.Session() as sess:
    #
    # initialize everything
    sess.run(tf.global_variables_initializer())
    #
    # and run the training iters
    for epoch in range(numTrainingIters):
        #
        # get some data
        x, y = generateDataRNN (maxSeqLen, training_data)
        #
        # do the training epoch
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _trainingAlg, _currentState, _predictions, _outputs = sess.run(
                [totalLoss, trainingAlg, currentState, predictions, outputs],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        #
        # just FYI, compute the number of correct predictions
        numCorrect = 0
        for i in range (len(y)):
            maxPos = -1
            maxVal = 0.0
            for j in range (numClasses):
                if maxVal < _predictions[i][j]:
                    maxVal = _predictions[i][j]
                    maxPos = j
            if maxPos == y[i]:
                numCorrect = numCorrect + 1
        #
        # print out to the screen
        print("Step", epoch, "Loss", _totalLoss, "Correct", numCorrect, "out of", batchSize)
    """
    test
    """
    test_loss = 0
    numCorrect = 0
    for j in range(0, len(testing_data), batchSize):
        x = np.stack([testing_data[i][1] for i in range(j,j+batchSize)])
        y = np.stack([np.array((testing_data[i][0])) for i in range(j,j+batchSize)])
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _predictions = sess.run(
                [totalLoss, predictions],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        # update loss
        test_loss += _totalLoss/30
        # calculate number of correct predictions
        for m in range(len(y)):
            maxPos = -1
            maxVal = 0.0
            for n in range(numClasses):
                if maxVal < _predictions[m][n]:
                    maxVal = _predictions[m][n]
                    maxPos = n
            if maxPos == y[m]:
                numCorrect += 1
    print("Loss for 3000 randomly chosen document is", test_loss)
    print("Number correct labels is",numCorrect,"out of 3000")



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 5002 Loss 0.17445552 Correct 94 out of 100
Step 5003 Loss 0.1511908 Correct 95 out of 100
Step 5004 Loss 0.24650377 Correct 92 out of 100
Step 5005 Loss 0.26489097 Correct 93 out of 100
Step 5006 Loss 0.17391895 Correct 95 out of 100
Step 5007 Loss 0.25076768 Correct 92 out of 100
Step 5008 Loss 0.22374897 Correct 92 out of 100
Step 5009 Loss 0.30870232 Correct 91 out of 100
Step 5010 Loss 0.43856907 Correct 86 out of 100
Step 5011 Loss 0.18942681 Correct 93 out of 100
Step 5012 Loss 0.26958004 Correct 88 out of 100
Step 5013 Loss 0.17928864 Correct 92 out of 100
Step 5014 Loss 0.32385054 Correct 89 out of 100
Step 5015 Loss 0.3024822 Correct 91 out of 100
Step 5016 Loss 0.29755577 Correct 92 out of 100
Step 5017 Loss 0.2687732 Correct 92 out of 100
Step 5018 Loss 0.15613134 Correct 96 out of 100
Step 5019 Loss 0.17235284 Correct 95 out of 100
Step 5020 Loss 0.21114685 Correct 94 out of 100
Step 5021 Loss 0.33466083 

## Task 3

In [10]:
def generateDataFeedForward(maxSeqLen, data):
    #
    # randomly sample batchSize lines of text
    myInts = np.random.randint(0, len(data), batchSize)
    #
    # stack all of the text into a matrix of one-hot characters
    x = np.stack([data[i][1].flatten() for i in myInts])
    #
    # and stack all of the labels into a vector of labels
    y = np.stack([data[i][0] for i in myInts])
    #
    # return the pair
    return (x, y)


In [12]:
hiddenUnits = 1000
hiddenUnits2 = 500

# now we build the TensorFlow computation... there are two inputs,
# a batch of text lines and a batch of labels
inputX = tf.placeholder(tf.float32, [batchSize, 256 * maxSeqLen])
inputY = tf.placeholder(tf.int32, [batchSize])

# this is the inital state of the RNN, before processing any data
# initialState = tf.placeholder(tf.float32, [batchSize, hiddenUnits])

# the weight matrix that maps the inputs and hidden state to a set of values
W1 = tf.Variable(np.random.normal(0, 0.01, (256 * maxSeqLen, hiddenUnits)), dtype=tf.float32)
b1 = tf.Variable(np.zeros([hiddenUnits]), dtype=tf.float32)

# weights and bias for the final classification
W2 = tf.Variable(np.random.normal(0, 0.01, (hiddenUnits, hiddenUnits2)), dtype=tf.float32)
b2 = tf.Variable(np.zeros([hiddenUnits2]), dtype=tf.float32)

W_output = tf.Variable(np.random.normal(0, 0.05, (hiddenUnits2, numClasses)), dtype=tf.float32)
b_output = tf.Variable(np.zeros([numClasses]), dtype=tf.float32)

# unpack the input sequences so that we have a series of matrices,
# each of which has a one-hot encoding of the current character from
# every input sequence
# sequenceOfLetters = tf.unstack(inputX, axis=2)

# compute the set of outputs
layer1_output = tf.nn.relu(tf.matmul(inputX, W1) + b1)
layer2_output = tf.nn.relu(tf.matmul(layer1_output, W2) + b2)
outputs = tf.matmul(layer2_output, W_output) + b_output

predictions = tf.nn.softmax(outputs)

# compute the loss
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=inputY)
totalLoss = tf.reduce_mean(losses)

# use gradient descent to train
trainingAlg = tf.compat.v1.train.AdagradOptimizer(0.01).minimize(totalLoss)

"""
Train and build the model
"""
# and train!!
with tf.Session() as sess:
    #
    # initialize everything
    sess.run(tf.global_variables_initializer())
    #
    # and run the training iters
    for epoch in range(numTrainingIters):
        #
        # get some data
        x, y = generateDataFeedForward (maxSeqLen, training_data)
        #
        # do the training epoch
        # _currentState = np.zeros((batchSize, hiddenUnits))
        # _totalLoss, _trainingAlg, _currentState, _predictions, _outputs = sess.run(
        #        [totalLoss, trainingAlg, currentState, predictions, outputs],
        #        feed_dict={
        #            inputX:x,
        #            inputY:y,
        #            initialState:_currentState
        #        })
        #
        _totalLoss, _trainingAlg, _predictions, _outputs = sess.run(
             [totalLoss, trainingAlg, predictions, outputs],
             feed_dict={
                 inputX: x,
                 inputY: y
              })
        # just FYI, compute the number of correct predictions
        numCorrect = 0
        for i in range (len(y)):
            maxPos = -1
            maxVal = 0.0
            for j in range (numClasses):
                if maxVal < _predictions[i][j]:
                    maxVal = _predictions[i][j]
                    maxPos = j
            if maxPos == y[i]:
                numCorrect = numCorrect + 1
        #
        # print out to the screen
        print("Step", epoch, "Loss", _totalLoss, "Correct", numCorrect, "out of", batchSize)
    """
    test
    """
    test_loss = 0
    numCorrect = 0
    for j in range(0, len(testing_data), batchSize):
        x, y = generateDataFeedForward(maxSeqLen, testing_data)
        #_currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _predictions = sess.run(
                [totalLoss, predictions],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    #initialState:_currentState
                })
        # update loss
        test_loss += _totalLoss/30
        # calculate number of correct predictions
        for m in range(len(y)):
            maxPos = -1
            maxVal = 0.0
            for n in range(numClasses):
                if maxVal < _predictions[m][n]:
                    maxVal = _predictions[m][n]
                    maxPos = n
            if maxPos == y[m]:
                numCorrect += 1
    print("Loss for 3000 randomly chosen document is", test_loss)
    print("Number correct labels is",numCorrect,"out of 3000")



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 5002 Loss 0.07544601 Correct 99 out of 100
Step 5003 Loss 0.046914507 Correct 99 out of 100
Step 5004 Loss 0.021030119 Correct 100 out of 100
Step 5005 Loss 0.07382551 Correct 98 out of 100
Step 5006 Loss 0.05195565 Correct 98 out of 100
Step 5007 Loss 0.0436834 Correct 100 out of 100
Step 5008 Loss 0.034364834 Correct 100 out of 100
Step 5009 Loss 0.07999952 Correct 97 out of 100
Step 5010 Loss 0.057697162 Correct 98 out of 100
Step 5011 Loss 0.037257582 Correct 99 out of 100
Step 5012 Loss 0.021838628 Correct 100 out of 100
Step 5013 Loss 0.06965373 Correct 97 out of 100
Step 5014 Loss 0.022572987 Correct 100 out of 100
Step 5015 Loss 0.09259837 Correct 98 out of 100
Step 5016 Loss 0.0317588 Correct 100 out of 100
Step 5017 Loss 0.03159101 Correct 100 out of 100
Step 5018 Loss 0.0604316 Correct 97 out of 100
Step 5019 Loss 0.033359494 Correct 99 out of 100
Step 5020 Loss 0.031390484 Correct 100 out of 100
Step 5021

## Task 4

In [35]:
# the number of hidden neurons that hold the state of the RNN
hiddenUnits = 500

# now we build the TensorFlow computation... there are two inputs,
# a batch of text lines and a batch of labels
inputX = tf.placeholder(tf.float32, [batchSize, 256, maxSeqLen])
inputY = tf.placeholder(tf.int32, [batchSize])

# this is the inital state of the RNN, before processing any data
initialState = tf.placeholder(tf.float32, [batchSize, hiddenUnits])

# Convolutional layer
num_filters = 32
filter_size = 15
#conv_layer = tf.keras.layers.Conv1D(filters=num_filters, kernel_size=filter_size, padding='same', activation='relu')(inputX)

def conv_1d(input_data, num_filters, filter_size):
    stride = 1
    num_positions = input_data.shape[2] - filter_size + 1
    filters = tf.Variable(tf.random.normal([filter_size, 256, num_filters]), dtype=tf.float32)
    outputs = []
    for i in range(num_positions):
        window = input_data[:, :, i:i + filter_size]
        window = tf.reshape(window, [batchSize, -1])
        filter_reshaped = tf.reshape(filters, [filter_size * 256, num_filters])
        conv_result = tf.matmul(window, filter_reshaped)
        outputs.append(conv_result)
    return tf.stack(outputs, axis=2)

conv_layer = conv_1d(inputX, num_filters, filter_size)

# the weight matrix that maps the inputs and hidden state to a set of values
Wfir = tf.Variable(np.random.normal(0, 0.03, (hiddenUnits + num_filters, hiddenUnits)) , dtype=tf.float32)
Wsec = tf.Variable(np.random.normal(0, 0.03, (hiddenUnits, hiddenUnits)), dtype=tf.float32)

# weights and bias for the final classification
W2 = tf.Variable(np.random.normal (0, 0.05, (hiddenUnits, numClasses)),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,numClasses)), dtype=tf.float32)


# unpack the input sequences so that we have a series of matrices,
# each of which has a one-hot encoding of the current character from
# every input sequence
sequenceOfLetters = tf.unstack(conv_layer, axis=2)

"""
Forward Path
"""
# now we implement the forward path
currentState = initialState
for timeTick in sequenceOfLetters:
    #
    # concatenate the state with the input, then compute the next state
    inputPlusState = tf.concat([timeTick, currentState], 1)
    next_state = tf.tanh(tf.matmul(inputPlusState, Wfir))
    last_state = tf.tanh(tf.matmul(next_state, Wsec))
    currentState = last_state

# compute the set of outputs
outputs = tf.matmul(currentState, W2) + b2

predictions = tf.nn.softmax(outputs)

# compute the loss
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=inputY)
totalLoss = tf.reduce_mean(losses)

# use gradient descent to train
trainingAlg = tf.compat.v1.train.AdagradOptimizer(0.005).minimize(totalLoss)


"""
Train and build the model
"""
# and train!!
with tf.Session() as sess:
    #
    # initialize everything
    sess.run(tf.global_variables_initializer())
    #
    # and run the training iters
    for epoch in range(numTrainingIters):
        #
        # get some data
        x, y = generateDataRNN (maxSeqLen, training_data)
        #
        # do the training epoch
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _trainingAlg, _currentState, _predictions, _outputs = sess.run(
                [totalLoss, trainingAlg, currentState, predictions, outputs],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        #
        # just FYI, compute the number of correct predictions
        numCorrect = 0
        for i in range (len(y)):
            maxPos = -1
            maxVal = 0.0
            for j in range (numClasses):
                if maxVal < _predictions[i][j]:
                    maxVal = _predictions[i][j]
                    maxPos = j
            if maxPos == y[i]:
                numCorrect = numCorrect + 1
        #
        # print out to the screen
        print("Step", epoch, "Loss", _totalLoss, "Correct", numCorrect, "out of", batchSize)
    """
    test
    """
    test_loss = 0
    numCorrect = 0
    for j in range(0, len(testing_data), batchSize):
        x = np.stack([testing_data[i][1] for i in range(j,j+batchSize)])
        y = np.stack([np.array((testing_data[i][0])) for i in range(j,j+batchSize)])
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _predictions = sess.run(
                [totalLoss, predictions],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        # update loss
        test_loss += _totalLoss/30
        # calculate number of correct predictions
        for m in range(len(y)):
            maxPos = -1
            maxVal = 0.0
            for n in range(numClasses):
                if maxVal < _predictions[m][n]:
                    maxVal = _predictions[m][n]
                    maxPos = n
            if maxPos == y[m]:
                numCorrect += 1
    print("Loss for 3000 randomly chosen document is", test_loss)
    print("Number correct labels is",numCorrect,"out of 3000")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 5002 Loss 0.68301225 Correct 68 out of 100
Step 5003 Loss 0.5722561 Correct 74 out of 100
Step 5004 Loss 0.6258967 Correct 70 out of 100
Step 5005 Loss 0.61472875 Correct 73 out of 100
Step 5006 Loss 0.60821867 Correct 69 out of 100
Step 5007 Loss 0.5599731 Correct 74 out of 100
Step 5008 Loss 0.65454555 Correct 73 out of 100
Step 5009 Loss 0.60834455 Correct 77 out of 100
Step 5010 Loss 0.55395573 Correct 77 out of 100
Step 5011 Loss 0.6195174 Correct 72 out of 100
Step 5012 Loss 0.604168 Correct 75 out of 100
Step 5013 Loss 0.6853349 Correct 66 out of 100
Step 5014 Loss 0.6255255 Correct 66 out of 100
Step 5015 Loss 0.51803714 Correct 80 out of 100
Step 5016 Loss 0.5442308 Correct 73 out of 100
Step 5017 Loss 0.60261226 Correct 70 out of 100
Step 5018 Loss 0.70166534 Correct 63 out of 100
Step 5019 Loss 0.6592189 Correct 75 out of 100
Step 5020 Loss 0.67490876 Correct 71 out of 100
Step 5021 Loss 0.6700247 Correct 

In [14]:
print("conv_output:" , conv_layer.shape)
print("timeTick shape:", timeTick.shape)
print("currentState shape:", currentState.shape)
print("Wfir", Wfir.shape)
print("inputPlusState", inputPlusState.shape)
print("Wsec", Wsec.shape)

conv_output: (100, 8, 74)
timeTick shape: (100, 8)
currentState shape: (100, 500)
Wfir (508, 500)
inputPlusState (100, 508)
Wsec (500, 500)


In [48]:
# the number of hidden neurons that hold the state of the RNN
hiddenUnits = 500

# now we build the TensorFlow computation... there are two inputs,
# a batch of text lines and a batch of labels
inputX = tf.placeholder(tf.float32, [batchSize, 256, maxSeqLen])
inputY = tf.placeholder(tf.int32, [batchSize])

# this is the inital state of the RNN, before processing any data
initialState = tf.placeholder(tf.float32, [batchSize, hiddenUnits])

# Convolutional layer
num_filters = 32
filter_size = 20
#conv_layer = tf.keras.layers.Conv1D(filters=num_filters, kernel_size=filter_size, padding='same', activation='relu')(inputX)

def conv_1d(input_data, num_filters, filter_size):
    stride = 1
    num_positions = input_data.shape[2] - filter_size + 1
    filters = tf.Variable(tf.random.normal([filter_size, 256, num_filters]), dtype=tf.float32)
    outputs = []
    for i in range(num_positions):
        window = input_data[:, :, i:i + filter_size]
        window = tf.reshape(window, [batchSize, -1])
        filter_reshaped = tf.reshape(filters, [filter_size * 256, num_filters])
        conv_result = tf.matmul(window, filter_reshaped)
        outputs.append(conv_result)
    return tf.stack(outputs, axis=2)

conv_layer = conv_1d(inputX, num_filters, filter_size)

# the weight matrix that maps the inputs and hidden state to a set of values
Wfir = tf.Variable(np.random.normal(0, 0.02, (hiddenUnits + num_filters, hiddenUnits)) , dtype=tf.float32)
Wsec = tf.Variable(np.random.normal(0, 0.02, (hiddenUnits, hiddenUnits)), dtype=tf.float32)

# weights and bias for the final classification
W2 = tf.Variable(np.random.normal (0, 0.05, (hiddenUnits, numClasses)),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,numClasses)), dtype=tf.float32)


# unpack the input sequences so that we have a series of matrices,
# each of which has a one-hot encoding of the current character from
# every input sequence
sequenceOfLetters = tf.unstack(conv_layer, axis=2)

"""
Forward Path
"""
# now we implement the forward path
currentState = initialState
for timeTick in sequenceOfLetters:
    #
    # concatenate the state with the input, then compute the next state
    inputPlusState = tf.concat([timeTick, currentState], 1)
    next_state = tf.tanh(tf.matmul(inputPlusState, Wfir))
    last_state = tf.tanh(tf.matmul(next_state, Wsec))
    currentState = last_state

# compute the set of outputs
outputs = tf.matmul(currentState, W2) + b2

predictions = tf.nn.softmax(outputs)

# compute the loss
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=inputY)
totalLoss = tf.reduce_mean(losses)

# use gradient descent to train
trainingAlg = tf.compat.v1.train.AdagradOptimizer(0.005).minimize(totalLoss)


"""
Train and build the model
"""
# and train!!
with tf.Session() as sess:
    #
    # initialize everything
    sess.run(tf.global_variables_initializer())
    #
    # and run the training iters
    for epoch in range(numTrainingIters):
        #
        # get some data
        x, y = generateDataRNN (maxSeqLen, training_data)
        #
        # do the training epoch
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _trainingAlg, _currentState, _predictions, _outputs = sess.run(
                [totalLoss, trainingAlg, currentState, predictions, outputs],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        #
        # just FYI, compute the number of correct predictions
        numCorrect = 0
        for i in range (len(y)):
            maxPos = -1
            maxVal = 0.0
            for j in range (numClasses):
                if maxVal < _predictions[i][j]:
                    maxVal = _predictions[i][j]
                    maxPos = j
            if maxPos == y[i]:
                numCorrect = numCorrect + 1
        #
        # print out to the screen
        print("Step", epoch, "Loss", _totalLoss, "Correct", numCorrect, "out of", batchSize)
    """
    test
    """
    test_loss = 0
    numCorrect = 0
    for j in range(0, len(testing_data), batchSize):
        x = np.stack([testing_data[i][1] for i in range(j,j+batchSize)])
        y = np.stack([np.array((testing_data[i][0])) for i in range(j,j+batchSize)])
        _currentState = np.zeros((batchSize, hiddenUnits))
        _totalLoss, _predictions = sess.run(
                [totalLoss, predictions],
                feed_dict={
                    inputX:x,
                    inputY:y,
                    initialState:_currentState
                })
        # update loss
        test_loss += _totalLoss/30
        # calculate number of correct predictions
        for m in range(len(y)):
            maxPos = -1
            maxVal = 0.0
            for n in range(numClasses):
                if maxVal < _predictions[m][n]:
                    maxVal = _predictions[m][n]
                    maxPos = n
            if maxPos == y[m]:
                numCorrect += 1
    print("Loss for 3000 randomly chosen document is", test_loss)
    print("Number correct labels is",numCorrect,"out of 3000")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 5002 Loss 0.9048325 Correct 53 out of 100
Step 5003 Loss 0.9493046 Correct 58 out of 100
Step 5004 Loss 0.9352542 Correct 54 out of 100
Step 5005 Loss 0.8470855 Correct 63 out of 100
Step 5006 Loss 0.90959066 Correct 60 out of 100
Step 5007 Loss 0.87425065 Correct 62 out of 100
Step 5008 Loss 0.8788588 Correct 58 out of 100
Step 5009 Loss 0.80705225 Correct 66 out of 100
Step 5010 Loss 0.897964 Correct 64 out of 100
Step 5011 Loss 0.93584234 Correct 55 out of 100
Step 5012 Loss 0.9357376 Correct 51 out of 100
Step 5013 Loss 0.8296272 Correct 65 out of 100
Step 5014 Loss 0.9310418 Correct 53 out of 100
Step 5015 Loss 1.0778213 Correct 46 out of 100
Step 5016 Loss 0.93405336 Correct 50 out of 100
Step 5017 Loss 0.8654205 Correct 57 out of 100
Step 5018 Loss 0.89247286 Correct 55 out of 100
Step 5019 Loss 0.8994996 Correct 58 out of 100
Step 5020 Loss 0.95143837 Correct 55 out of 100
Step 5021 Loss 0.92678404 Correct 53