In [18]:
import numpy as np
import pandas as pd
import os# keras module for building LSTM 
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout, GRU, SimpleRNN
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku
import tensorflow.keras.utils as tku
import pandas as pd
import numpy as np
import string, os
import time

In [2]:
hotel_df = pd.read_csv('Seattle_Hotels.csv', encoding="latin-1")
all_descriptions = list(hotel_df.desc.values)

In [3]:
len(all_descriptions)

152

In [4]:
corpus = [x for x in all_descriptions]
corpus[:1]

['Located on the southern tip of Lake Union, the Hilton Garden Inn Seattle Downtown hotel is perfectly located for business and leisure. Non-Smoking\nHotel is 100% non-smoking, including e-cigarettes, in all guest rooms and public areas. A fee of up to $250 USD will be assessed for smoking in a non-smoking room. Please ask the Front Desk for locations of designated outdoor smoking areas. Check-in: 4:00 pm. Check-out: 12:00 pm. Cancellation policies may vary depending on the rate or dates of your reservation. Please refer to your reservation confirmation to verify your cancellation policy.\n']

In [5]:
t = Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' ', char_level=False, oov_token=None, document_count=0)
t.fit_on_texts(corpus)

In [6]:
# A dictionary of words and their counts.
print(t.word_counts)

# A dictionary of words and how many documents each appeared in.
print(t.word_docs)

# An integer count of the total number of documents that were used to fit the Tokenizer (i.e. total number of documents)
print(t.document_count)

# A dictionary of words and their uniquely assigned integers.
print(t.word_index)

OrderedDict([('located', 106), ('on', 128), ('the', 1236), ('southern', 1), ('tip', 1), ('of', 526), ('lake', 40), ('union', 31), ('hilton', 11), ('garden', 11), ('inn', 81), ('seattle', 463), ('downtown', 131), ('hotel', 293), ('is', 279), ('perfectly', 6), ('for', 213), ('business', 84), ('and', 1044), ('leisure', 18), ('non', 19), ('smoking', 29), ('100', 10), ('including', 48), ('e', 2), ('cigarettes', 2), ('in', 460), ('all', 104), ('guest', 61), ('rooms', 106), ('public', 9), ('areas', 19), ('a', 610), ('fee', 7), ('up', 46), ('to', 474), ('250', 3), ('usd', 2), ('will', 50), ('be', 49), ('assessed', 2), ('room', 81), ('please', 5), ('ask', 2), ('front', 12), ('desk', 12), ('locations', 2), ('designated', 6), ('outdoor', 24), ('check', 41), ('4', 15), ('00', 19), ('pm', 11), ('out', 36), ('12', 11), ('cancellation', 3), ('policies', 5), ('may', 5), ('vary', 3), ('depending', 2), ('rate', 8), ('or', 159), ('dates', 2), ('your', 184), ('reservation', 5), ('refer', 1), ('confirmatio

In [7]:
print('Found %s unique tokens.' % len(t.word_index))

Found 3428 unique tokens.


In [8]:
# Tokenization
t = Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' ', char_level=False, oov_token=None, document_count=0)

def get_sequence_of_tokens(corpus):
    t.fit_on_texts(corpus)
    total_words = len(t.word_index) + 1
    
    input_sequences = []
    for line in corpus:
        token_list = t.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)
            
    return input_sequences, total_words
input_sequences, total_words = get_sequence_of_tokens(corpus)

In [9]:
input_sequences[:10]

[[24, 22],
 [24, 22, 1],
 [24, 22, 1, 1750],
 [24, 22, 1, 1750, 1751],
 [24, 22, 1, 1750, 1751, 4],
 [24, 22, 1, 1750, 1751, 4, 83],
 [24, 22, 1, 1750, 1751, 4, 83, 114],
 [24, 22, 1, 1750, 1751, 4, 83, 114, 1],
 [24, 22, 1, 1750, 1751, 4, 83, 114, 1, 334],
 [24, 22, 1, 1750, 1751, 4, 83, 114, 1, 334, 335]]

In [10]:
total_words

3429

In [11]:
# pad sequences 
def generate_padded_sequences(input_sequences):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen = max_sequence_len, padding = 'pre'))
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = tku.to_categorical(label, num_classes = total_words)
    
    return predictors, label, max_sequence_len

predictors, label, max_sequence_len = generate_padded_sequences(input_sequences)

In [21]:
def create_model(max_sequence_len, total_words, model_type):
    model = Sequential()
    
    # Add Input Embedding Layer
    model.add(Embedding(total_words, 10, input_length=max_sequence_len - 1))
    
    # Add Hidden Layer 1 - LSTM Layer
    if model_type == 'LSTM':
        model.add(LSTM(100))
    elif model_type == 'GRU':
        model.add(GRU(100))
    elif model_type == 'SimpleRNN':
        model.add(SimpleRNN(100))
    
    model.add(Dropout(0.1))
    
    # Add Output Layer
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    return model

model_LSTM = create_model(max_sequence_len, total_words, 'LSTM')
model_LSTM.summary()

model_GRU = create_model(max_sequence_len, total_words, 'GRU')
model_GRU.summary()

model_SimpleRNN = create_model(max_sequence_len, total_words, 'SimpleRNN')
model_SimpleRNN.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 505, 10)           34290     
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               44400     
_________________________________________________________________
dropout_3 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 3429)              346329    
Total params: 425,019
Trainable params: 425,019
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 505, 10)           34290     
__________________________

In [22]:
epochs = 50
start = time.time()
model_LSTM.fit(predictors, label, epochs=epochs, verbose=5)
end = time.time()
print('LSTM Training Time:',end-start)
start = time.time()
model_GRU.fit(predictors, label, epochs=epochs, verbose=5)
end = time.time()
print('GRU Training Time:',end-start)
start = time.time()
model_SimpleRNN.fit(predictors, label, epochs=epochs, verbose=5)
end = time.time()
print('SimpleRNN Training Time:',end-start)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
LSTM Training Time: 677.3964171409607
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32

In [23]:
def generate_text(seed_text, next_words, model, max_seq_len):
    for _ in range(next_words):
        token_list = t.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
        
        #predicted = model.predict(token_list, verbose=0)
        predicted = np.argmax(model.predict(token_list), axis=-1)
        
        output_word = ''
        
        for word,index in t.word_index.items():
            if index == predicted:
                output_word = word
                break
                
        seed_text = seed_text + " " + output_word
        
    return seed_text.title()

In [24]:
print(generate_text("hilton seattle downtown", 100, model_LSTM, max_sequence_len))
print()
print(generate_text("best western seattle airport hotel", 200, model_LSTM, max_sequence_len))
print()
print(generate_text('located in the heart of downtown seattle', 300, model_LSTM, max_sequence_len))

Hilton Seattle Downtown Seattle Hotel Is Located In The Heart Of Downtown Seattle The Holiday Inn® Seattle Is Near The Space Needle The Seattle Hotel District And The CityS Industrial Center And Emerging Parks And Trails In The Pacific Northwest Are Just Steps From The Pacific Northwest Including Bing Crosby Floors Jimi Detailing Course And An Upgraded Hotel For All The Locals Inside You Will Be Embraced On Opulence Like The Small Alexis Hotel Is Perfectly Situated In The Heart Of The Pacific Northwest And Bing Crosby Bay Park Is A Thriving Distance To A Traditional Hotel At The University Of Washington University

Best Western Seattle Airport Hotel Is The Perfect Gateway Of The Heart Of Downtown Seattle The Hilton Seattle Hotel Is Located In The Heart Of The City And Elliott Bay And The City Of The City Of The Wac And An Unparalleled Course With An Living Area With A Separate Living And Grand Living And Seating And Furnished And Modern Amenities Access To The Light Rail To The Maxwel

In [25]:
print(generate_text("hilton seattle downtown", 100, model_GRU, max_sequence_len))
print()
print(generate_text("best western seattle airport hotel", 200, model_GRU, max_sequence_len))
print()
print(generate_text('located in the heart of downtown seattle', 300, model_GRU, max_sequence_len))

Hilton Seattle Downtown Hotel Seattle Airport Seatac Hotel Is Located Just Steps From Everywhere You Want To Be Here In The Area'S Downtown Seattle Airport Hotel The Museum Of Flight The Seattle Hotels In Seattle Wa Near The Washington State Convention Center And The Space Needle Pike Place Market And The Seattle Waterfront Seattle Hotel Offers The Space Needle Pike Place Market And The Washington Convention Center And Restaurants And Locals Stop By The Space Needle And The Ferries Bound Across The Gorgeous Seattle For A Full Service Complete With A Private Bathroom Equipped With A Bath And Service That For A Workout

Best Western Seattle Airport Hotel Is Just Steps From The Seattle Tacoma International Airport And The Museum Of Washington And The Cornish School Of Arts And Businesses Like The Lively Apartment Of The Space Needle Pike Place Market And Seattle And Leisure All Guest Rooms And Suites Each Of Our 75 Hyatt Hotel Is Close To The Space Needle Pike Place Market And The Scenic 

In [26]:
print(generate_text("hilton seattle downtown", 100, model_SimpleRNN, max_sequence_len))
print()
print(generate_text("best western seattle airport hotel", 200, model_SimpleRNN, max_sequence_len))
print()
print(generate_text('located in the heart of downtown seattle', 300, model_SimpleRNN, max_sequence_len))

Hilton Seattle Downtown Seattle Is The Best Of Pike Place Market And The Washington State Convention Center And The Space Needle The Seattle Mariners And Only 10 Blocks From Volunteer Park The Most Jewel Of The Seattle Center In The Heart Of The City And Upscale Boutique Hotel Is A Short Drive With The Pacific Northwest Escape To The Space Needle And The Night And Great Views Featuring The Rich Floor And Coffee Out And Enjoy The Lobby With Free Wi Fi And Business Located In The Quiet Beauty Of Lake Stay The Seattle Art Museum And Volunteer Miles From The Space Needle

Best Western Seattle Airport Hotel Is Near The Hotel Is Close To Highways Door At The Property Is Located In The Heart Of Downtown Seattle The Holiday Inn® Seattle Offers A Contemporary Hotel Near Seattle Tacoma International Airport Sea And The Best Western Seattle Airport Is A Quick Minute Walk From Volunteer Park And 1 5 Miles From The Botanic Gardens  The Elegant Hotel Station Feature A Private Kitchen Free Coffee Acc