<a href="https://colab.research.google.com/github/mhuckvale/pals0039/blob/master/Answers_7_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[![PALS0039 Logo](https://www.phon.ucl.ac.uk/courses/pals0039/images/pals0039logo.png)](https://www.phon.ucl.ac.uk/courses/pals0039/)

# Exercise 7.1 Answers


(a) setup

In [0]:
import requests
import numpy as np

%tensorflow_version 2.x
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Embedding, Flatten, SimpleRNN, LSTM, GRU, Bidirectional, Dropout, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences

(b) read in text file and convert to integer list

In [0]:
url = "https://www.phon.ucl.ac.uk/courses/pals0039/data/alice.txt"
response = requests.get(url)
raw_text = response.text.lower().replace('\n',' ')
print(raw_text[:250])

In [0]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
print(char_to_int)
NCHAR=len(chars)
print(NCHAR)
raw_seq=[char_to_int[x] for x in raw_text]
print(raw_seq[:100])

(c) divide into train and test

In [0]:
ntrain=int(0.9*len(raw_seq))
train_seq=raw_seq[:ntrain]
test_seq=raw_seq[ntrain:]
print(len(train_seq),len(test_seq))

(d) build training sequences

In [0]:
def prepare_sequences(text,seqlen):
  nseq=(len(text)-1)//seqlen
  feats=np.zeros((nseq,seqlen))
  labels=np.zeros((nseq,seqlen))
  for i in range(nseq):
    feats[i,:]=text[i*seqlen:i*seqlen+seqlen]       # input is text sequence
    labels[i,:]=text[i*seqlen+1:i*seqlen+seqlen+1]  # output is text sequence advanced by 1
#  feats=np.zeros((nseq,seqlen,1))
#  labels=np.zeros((nseq,seqlen,1))
#  for i in range(nseq):
#    feats[i,:,0]=text[i*seqlen:i*seqlen+seqlen]       # input is text sequence
#    labels[i,:,0]=text[i*seqlen+1:i*seqlen+seqlen+1]  # output is text sequence advanced by 1
  return feats,labels

seqlen=100
Xtrain,ytrain = prepare_sequences(train_seq,seqlen)
Xtest,ytest = prepare_sequences(test_seq,seqlen)

print(Xtrain.shape,ytrain.shape)
print(Xtest.shape,ytest.shape)


(e) build a model

In [0]:
import tensorflow as tf
def perplexity(y_true, y_pred):
    cross_entropy = tf.losses.sparse_categorical_crossentropy(y_true, y_pred)
    perplexity = tf.exp(tf.reduce_mean(cross_entropy))
    return perplexity

osize=NCHAR

model = Sequential()
model.add(Embedding(input_dim=NCHAR, output_dim=64,input_length=seqlen))
model.add(LSTM(256,return_sequences=True,activation='tanh'))
model.add(LSTM(256,return_sequences=True,activation='tanh'))
#model.add(LSTM(256,return_sequences=True,activation='tanh',input_shape=(seqlen,1)))
model.add(TimeDistributed(Dense(osize, activation='softmax')));
#
# compile the network
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=[perplexity])
print(model.summary())

(f) train model

In [0]:

# train the model
history=model.fit(Xtrain,ytrain, batch_size=64, validation_data=(Xtest,ytest), epochs=100)
print(history.history)

(g) calculate perplexity

In [0]:
import math

ypred=model.predict(Xtest)
print(ypred.shape)
nseq=ypred.shape[0]
seqlen=ypred.shape[1]
ypred=np.reshape(ypred,(nseq*seqlen,ypred.shape[2]));
print(ypred.shape)

ytest_seq=np.reshape(ytest,(nseq*seqlen,1))
print(ytest_seq.shape)

probs=[]
for i in range(ytest_seq.shape[0]):
  probs.append(ypred[i,int(ytest_seq[i])])
meanprob=np.mean(probs)
entropy=np.mean(-np.log(probs))
print(meanprob,entropy,math.exp(entropy))

(h) generate some new text

In [0]:
import tensorflow as tf

def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [char_to_int[s] for s in start_string]
  input_eval = pad_sequences([input_eval],maxlen=100,padding='pre',value=0)
  input_eval = np.reshape(input_eval,(1,100))

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
    #print(input_eval.shape)
    #print(input_eval)
    predictions = model.predict(input_eval,batch_size=1)
    #print(predictions)

    # remove the batch dimension
    predictions = tf.squeeze(predictions, 0)
    #print(predictions)

    # using a categorical distribution to predict the word returned by the model
#   predictions = predictions / temperature
    predicted_id = tf.random.categorical(tf.math.log(predictions), num_samples=1)[-1,0].numpy()
    #print(predicted_id)
#    predicted_id = np.argmax(predictions)
  
    # We pass the predicted word as the next input to the model
    # along with the previous hidden state
    input_eval[0,0:99] = input_eval[0,1:100];
    input_eval[0,99] = predicted_id;

    text_generated.append(chars[predicted_id])

  return (start_string + ''.join(text_generated))

import textwrap
text=generate_text(model, start_string="once upon a time ")
print(textwrap.fill(text,80))