## 7. Reference Model With Longer Sequence Application

In [1]:
# import libraries
import numpy as np
import random
from random import randint
from pickle import load
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
from rouge import Rouge

# set random seed
random.seed(1234)

### 7.1 Load Text Document

In [2]:
# load doc into memory
def load_doc(filename):
    # open the file as read only
    file = open(filename,'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text

#### Load Frank Kafka - Metamorphosis Text

In [3]:
# load cleaned text sequences from metamorphosis
in_filename = '../data/Text_Sequences_100_meta.txt'
doc = load_doc(in_filename)
meta_lines = doc.split('\n')

#### Load Frank Kafka - The Castle Text

In [4]:
# load cleaned text sequences from the castle
in_filename = '../data/Text_Sequences_100_castle.txt'
doc = load_doc(in_filename)
castle_lines = doc.split('\n')

#### Load Haruki Murakami - Kafka on the Shore Text

In [5]:
# load cleaned text sequences from kafka on the shore
in_filename = '../data/Text_Sequences_100_kots.txt'
doc = load_doc(in_filename)
kots_lines = doc.split('\n')

#### Sequence Length

In [6]:
# number of words in each line
len(meta_lines[0].split())

101

In [7]:
# seq_length will be the number of words minus the expected output word
seq_length = len(meta_lines[0].split()) - 1
seq_length

100

### 7.2 Load Model

In [8]:
# load reference model
model = load_model('model_ref_ls.h5')

In [9]:
# load tokenizer
tokenizer = load(open('tokenizer.pkl', 'rb'))

### 7.3 Generate Text

#### Define sequence generating function

In [10]:
# generate a sequence from the language model
def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
    result = list()
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode seed text to integers using tokenizer
        # index 0 must be specified to extract list of intergers from array
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # truncate sequences into a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # predict probabilities for each word
        # replaced yhat = model.predict_classes(encoded,verbose=0)
        yhat = np.argmax(model.predict(encoded), axis=-1)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break
        # append to input
        in_text += ' ' + out_word
        result.append(out_word)
    return ' '.join(result)

#### Metamorphosis - Seed Text

In [11]:
# select seed text from Metamorphosis
meta_index = randint(0, len(meta_lines)-120)
meta_seed_text = meta_lines[meta_index]
print("Seed text taken from index: " + str(meta_index) + '\n')
print(meta_seed_text + '\n')

Seed text taken from index: 14441

sister would urge each other to be quiet his mother bent deeply under the lamp would sew fancy underwear for a fashion shop his sister who had taken a sales job learned shorthand and french in the evenings so that she might be able to get a better position later on sometimes his father would wake up and say to mother doing so much sewing again as if he did not know that he had been then he would go back to sleep again while mother and sister would exchange a tired grin with a kind of stubbornness father refused to



#### Metamorphosis - Reference Text 

In [12]:
meta_ref = meta_lines[meta_index+101]
print(meta_ref)

take his uniform off even at home while his nightgown hung unused on its peg father would slumber where he was fully dressed as if always ready to serve and expecting to hear the voice of his superior even here the uniform had not been new to start with but as a result of this it slowly became even shabbier despite the efforts of mother and sister to look after it gregor would often spend the whole evening looking at all the stains on this coat with its gold buttons always kept polished and shiny while the old man in it


####  Metamorphosis - Predicted Text

In [13]:
# input length of output sequence; n_words = 100 
# seq_length = 100 as assigned when loading the text document
meta_pred = generate_seq(model, tokenizer, seq_length, meta_seed_text, 100)
print(meta_pred)

stay of bed as anxious as it would have been in room which the behind but he had to be natural when gregor was already strikes together way out of the new swinging was more of a length than an effort but now and then stay of her own into would be built just seriously up with the recommend of their time for an leading again it would only close his head made him loud it if if were all they would have completely it i your far i but as a load of knew in the room might that


In [14]:
# input length of output sequence; n_words = 100 
# seq_length = 100 as assigned when loading the text document
meta_pred_50 = generate_seq(model, tokenizer, seq_length, meta_seed_text, 50)
print(meta_pred_50)

stay of bed as anxious as it would have been in room which the behind but he had to be natural when gregor was already strikes together way out of the new swinging was more of a length than an effort but now and then stay of her own into


#### The Castle - Seed Text

In [15]:
# select seed text from The Castle
castle_index = randint(0, len(castle_lines)-120)
castle_seed_text = castle_lines[castle_index]
print("Seed text taken from index: " + str(castle_index) + '\n')
print(castle_seed_text + '\n')

Seed text taken from index: 119

not want to talk to anyone got himself a straw mattress from the attic and lay down by the stove it was warm the peasants were quiet he examined them for a moment with tired eyes then fell asleep yet before long he was awakened a young man in city clothes with an actors face narrow eyes thick eyebrows stood beside him with the landlord the peasants too were still there a few had turned their chairs around to see and hear better the young man apologized very politely for having awakened k introduced himself as the son of the castle



#### The Castle - Reference Text

In [16]:
castle_ref = castle_lines[castle_index+101]
print(castle_ref)

steward and said this village is castle property anybody residing or spending the night here is effectively residing or spending the night at the castle nobody may do so without permission from the count but you have no such permission or at least you havent shown it yet k who had half risen and smoothed his hair looked at the people from below and said what village have i wandered into so there is a castle here why of course the young man said slowly while several peasants here and there shook their heads at k the castle of count westwest


#### The Castle - Predicted Text

In [17]:
# input length of output sequence; n_words = 100 
# seq_length = 50 as assigned when loading the text document
castle_pred = generate_seq(model, tokenizer, seq_length, castle_seed_text, 100)
print(castle_pred)

taking in moving mr just the chief clerk not knowing that it was not looked simple to get think the chief clerk himself for a little while not she being them it soon persecuting at her and but she would closer their into came unsure gregor he remained all a little had there he felt his into who came back and over in recent the gazing several high his into would have instance he had forefinger grasp from the living room and foot up and down the effort and he must have something him tired at words and was still


In [18]:
# input length of output sequence; n_words = 100 
# seq_length = 50 as assigned when loading the text document
castle_pred_50 = generate_seq(model, tokenizer, seq_length, castle_seed_text, 50)
print(castle_pred_50)

taking in moving mr just the chief clerk not knowing that it was not looked simple to get think the chief clerk himself for a little while not she being them it soon persecuting at her and but she would closer their into came unsure gregor he remained all a


#### Kafka on the Shore - Seed Text

In [19]:
# select seed text from Kafka on the Shore
kots_index = randint(0, len(kots_lines)-120)
kots_seed_text = kots_lines[kots_index]
print("Seed text taken from index: " + str(kots_index) + '\n')
print(kots_seed_text + '\n')

Seed text taken from index: 3

i take from my study when i leave home i take a small old gold like the design and feel of a folding knife with a really sharp blade made to skin deer it has a five inch blade and a nice heft probably something he bought on one of his trips abroad i also take a sturdy bright pocket flashlight out of a drawer plus sky blue revo sunglasses to disguise my age i think about taking my favorite sea dweller oyster rolex a beautiful watch but something flashy will only attract attention my cheap plastic casio watch with an



#### Kafka on the Shore - Reference Text

In [20]:
kots_ref = kots_lines[kots_index+101]

#### Kafka on the Shore - Predicted Text

In [21]:
# input length of output sequence; n_words = 100 
# seq_length = 50 as assigned when loading the text document
kots_pred = generate_seq(model, tokenizer, seq_length, kots_seed_text, 100)
print(kots_pred)

crawled that he would miracle the taking in the hall she held mother the floor he could rather without lifted up all the evening but thoughts of the finished took things on the started that he was wrong he had used samsa mother room others any favour of the lodged them once on their against heaviest opened did not great his head back from the bed darkness dinner away were done out from become the behind and mouth down all this maybe it was to great which it the door he could still give their liked on his travellers and


In [22]:
# input length of output sequence; n_words = 100 
# seq_length = 50 as assigned when loading the text document
kots_pred_50 = generate_seq(model, tokenizer, seq_length, kots_seed_text, 50)
print(kots_pred_50)

crawled that he would miracle the taking in the hall she held mother the floor he could rather without lifted up all the evening but thoughts of the finished took things on the started that he was wrong he had used samsa mother room others any favour of the lodged


### 7.3 Evaluate Model

#### BLEU Score

In [23]:
cc = SmoothingFunction()
def bleu(ref, gen):
    ref = [ref.split(" ")]
    gen = gen.split(" ")
        
    print('Cumulative 1-gram: %f' % sentence_bleu(ref, gen, weights=(1, 0, 0, 0), smoothing_function=cc.method1))
    print('Cumulative 2-gram: %f' % sentence_bleu(ref, gen, weights=(0.5, 0.5, 0, 0), smoothing_function=cc.method1))
    print('Cumulative 3-gram: %f' % sentence_bleu(ref, gen, weights=(0.33, 0.33, 0.33, 0), smoothing_function=cc.method1))
    print('Cumulative 4-gram: %f' % sentence_bleu(ref, gen, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=cc.method1))

In [24]:
print("BLEU Score for Metamorphosis" + "\n")
bleu(meta_ref, meta_pred)

BLEU Score for Metamorphosis

Cumulative 1-gram: 0.287114
Cumulative 2-gram: 0.075780
Cumulative 3-gram: 0.039987
Cumulative 4-gram: 0.015599


In [25]:
print("BLEU Score for The Castle" + "\n")
bleu(castle_ref, castle_pred)

BLEU Score for The Castle

Cumulative 1-gram: 0.257413
Cumulative 2-gram: 0.050737
Cumulative 3-gram: 0.014353
Cumulative 4-gram: 0.007178


In [26]:
print("BLEU Score for The Kafka on the Shore" + "\n")
bleu(kots_ref, kots_pred)

BLEU Score for The Kafka on the Shore

Cumulative 1-gram: 0.148507
Cumulative 2-gram: 0.012187
Cumulative 3-gram: 0.005599
Cumulative 4-gram: 0.003518


#### ROUGE Score

In [31]:
# instantiate Rouge scoring function
rouge = Rouge()

In [32]:
# get rouge score for The Metamorphosis
rouge.get_scores(meta_pred, meta_ref)

[{'rouge-1': {'r': 0.273972602739726,
   'p': 0.2777777777777778,
   'f': 0.2758620639657551},
  'rouge-2': {'r': 0.02, 'p': 0.020833333333333332, 'f': 0.02040815826738981},
  'rouge-l': {'r': 0.1232876712328767, 'p': 0.125, 'f': 0.12413792603472078}}]

In [33]:
# get rouge score for The Castle
rouge.get_scores(castle_pred, castle_ref)

[{'rouge-1': {'r': 0.26153846153846155,
   'p': 0.24285714285714285,
   'f': 0.25185184685871065},
  'rouge-2': {'r': 0.010752688172043012,
   'p': 0.010526315789473684,
   'f': 0.01063829287290864},
  'rouge-l': {'r': 0.1076923076923077, 'p': 0.1, 'f': 0.10370369871056266}}]

In [34]:
# get rouge score for Kafka on the Shore
rouge.get_scores(kots_pred, kots_ref)

[{'rouge-1': {'r': 0.1232876712328767,
   'p': 0.1232876712328767,
   'f': 0.12328766623287692},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 0.0547945205479452,
   'p': 0.0547945205479452,
   'f': 0.05479451554794566}}]