In [1]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 
import pandas as pd
import numpy as np
import string, os 
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)

Using TensorFlow backend.


In [2]:
hotel_df = pd.read_csv('Seattle_Hotels.csv', encoding="latin-1")
all_descriptions = list(hotel_df.desc.values)

In [3]:
len(all_descriptions)

152

In [5]:
corpus = [x for x in all_descriptions]
corpus[:1]

["Located on the southern tip of Lake Union, the Hilton Garden Inn Seattle Downtown hotel is perfectly located for business and leisure. \nThe neighborhood is home to numerous major international companies including Amazon, Google and the Bill & Melinda Gates Foundation. A wealth of eclectic restaurants and bars make this area of Seattle one of the most sought out by locals and visitors. Our proximity to Lake Union allows visitors to take in some of the Pacific Northwest's majestic scenery and enjoy outdoor activities like kayaking and sailing. over 2,000 sq. ft. of versatile space and a complimentary business center. State-of-the-art A/V technology and our helpful staff will guarantee your conference, cocktail reception or wedding is a success. Refresh in the sparkling saltwater pool, or energize with the latest equipment in the 24-hour fitness center. Tastefully decorated and flooded with natural light, our guest rooms and suites offer everything you need to relax and stay productive

In [6]:
t = Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' ', char_level=False, oov_token=None, document_count=0)
t.fit_on_texts(corpus)

In [None]:
# A dictionary of words and their counts.
print(t.word_counts)

# A dictionary of words and how many documents each appeared in.
print(t.word_docs)

# An integer count of the total number of documents that were used to fit the Tokenizer (i.e. total number of documents)
print(t.document_count)

# A dictionary of words and their uniquely assigned integers.
print(t.word_index)

In [9]:
print('Found %s unique tokens.' % len(t.word_index))

Found 3420 unique tokens.


In [10]:
# Tokenization
t = Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' ', char_level=False, oov_token=None, document_count=0)

def get_sequence_of_tokens(corpus):
    t.fit_on_texts(corpus)
    total_words = len(t.word_index) + 1
    
    input_sequences = []
    for line in corpus:
        token_list = t.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)
            
    return input_sequences, total_words
input_sequences, total_words = get_sequence_of_tokens(corpus)

In [11]:
input_sequences[:10]

[[24, 21],
 [24, 21, 1],
 [24, 21, 1, 1734],
 [24, 21, 1, 1734, 1735],
 [24, 21, 1, 1734, 1735, 4],
 [24, 21, 1, 1734, 1735, 4, 81],
 [24, 21, 1, 1734, 1735, 4, 81, 111],
 [24, 21, 1, 1734, 1735, 4, 81, 111, 1],
 [24, 21, 1, 1734, 1735, 4, 81, 111, 1, 330],
 [24, 21, 1, 1734, 1735, 4, 81, 111, 1, 330, 331]]

In [44]:
total_words

3421

In [12]:
# pad sequences 
def generate_padded_sequences(input_sequences):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen = max_sequence_len, padding = 'pre'))
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes = total_words)
    
    return predictors, label, max_sequence_len

predictors, label, max_sequence_len = generate_padded_sequences(input_sequences)

In [8]:
def create_model(max_sequence_len, total_words):
    model = Sequential()
    
    # Add Input Embedding Layer
    model.add(Embedding(total_words, 10, input_length=max_sequence_len - 1))
    
    # Add Hidden Layer 1 - LSTM Layer
    model.add(LSTM(100))
    model.add(Dropout(0.1))
    
    # Add Output Layer
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    return model

model = create_model(max_sequence_len, total_words)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 491, 10)           34640     
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               44400     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 3464)              349864    
Total params: 428,904
Trainable params: 428,904
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(predictors, label, epochs=100, verbose=5)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

<keras.callbacks.History at 0x7f572c21ceb8>

In [10]:
def generate_text(seed_text, next_words, model, max_seq_len):
    for _ in range(next_words):
        token_list = t.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
        
        predicted = model.predict_classes(token_list, verbose=0)
        
        output_word = ''
        
        for word,index in t.word_index.items():
            if index == predicted:
                output_word = word
                break
                
        seed_text = seed_text + " " + output_word
        
    return seed_text.title()

In [39]:
print(generate_text("hilton seattle downtown", 100, model, max_sequence_len))
print()
print(generate_text("best western seattle airport hotel", 200, model, max_sequence_len))
print()
print(generate_text('located in the heart of downtown seattle', 300, model, max_sequence_len))

Hilton Seattle Downtown Hotel Is Located In The Heart Of Downtown Seattle The Waterfront Inn Is A Contemporary Haven Near The Hotel At El Gaucho With The Simple Food Of Featured With A Large Inviting Of Featured In The Quiet Gym Sound Features A Local Views In The Side Floor Rooms And Harbor From The Seattle From Hotel In Seattle At Our Downtown Seattle Hotel Hotel Is The Friendliest Inn Seattle Airport And Enjoy Us In The Heart Of Seattle And Enjoy A Extended Old Baseball Experience That Within Directly From The Market And A Fullservice Hotel Leisure For A Range Of Upscale

Best Western Seattle Airport Hotel Is A Leading 119 Guestroom Boutique Hotel In Seattle As Conveniently Located Across The Street From The Emp An Interactive Music Music Service With A Balcony 37Inch Airport With A Large Views Of Seattle Seattle Style And A Variety Of The Living Area Rooms To Offer The Unique Views Of The City And Take In The Heart Of The City This Legendary Hotel Offers A Oneofakind Place To The E