In [28]:
from os import listdir
from numpy import array
from keras.preprocessing.text import Tokenizer, one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.utils import to_categorical
from keras.layers import Embedding, TimeDistributed, RepeatVector, LSTM, concatenate , Input, Reshape, Dense, Flatten
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.callbacks import ModelCheckpoint
import numpy as np
from keras.models import model_from_json

In [29]:
images = []
all_filenames = listdir('../Spline_Out/Pics/')
all_filenames.sort()
all_filenames.remove('.DS_Store')
all_filenames.remove('.ipynb_checkpoints')
for filename in all_filenames:
    print(filename)
    images.append(img_to_array(load_img('../Spline_Out/Pics/'+filename, target_size=(299, 299))))
images = np.array(images, dtype=float)
images = preprocess_input(images)

IR2 = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
#IR2.load_weights('/data/models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5')
features = IR2.predict(images)

KT Anna Skirt-FT.png
KT Anna Skirt-PKT.png
KT Anna Skirt-WB-X.png
KT Anna Skirt-WB.png
KT Jojo Sweatshirt-BK.png
KT Jojo Sweatshirt-FT.png
KT Jojo Sweatshirt-NKBND.png
KT Jojo Sweatshirt-PKT.png
KT Jojo Sweatshirt-SLV.png
KT Olivia Skirt-BK.png
KT Olivia Skirt-FT.png
KT Rumi Sweatshirt-Cuff.png
KT Rumi Sweatshirt-FT.png
KT Rumi Sweatshirt-NKBDG.png
KT Rumi Sweatshirt-NKBND.png
KT Rumi Sweatshirt-SLV.png
KT Rumi Sweatshirt-WB.png
KT longsleeve crew neck-FT.png
KT longsleeve crew neck-SLV.png


In [30]:
# We will cap each input sequence to 100 tokens
max_caption_len = 100
# Initialize the function that will create our vocabulary 
tokenizer = Tokenizer(filters='', split=" ", lower=False)

# Read a document and return a string
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

# Load all the HTML files
X = []
all_filenames = listdir('../Spline_Out/XML/')
all_filenames.sort()
all_filenames.remove('.DS_Store')
all_filenames.remove('.ipynb_checkpoints')
for filename in all_filenames:
    X.append(load_doc('../Spline_Out/XML/'+filename))

# Create the vocabulary from the html files
tokenizer.fit_on_texts(X)

# Add +1 to leave space for empty words
vocab_size = len(tokenizer.word_index) + 1
# Translate each word in text file to the matching vocabulary index
sequences = tokenizer.texts_to_sequences(X)
# The longest HTML file
max_length = max(len(s) for s in sequences)


print(len(sequences), len(features))



# Intialize our final input to the model
X, y, image_data = list(), list(), list()
for img_no, seq in enumerate(sequences):
    for i in range(1, len(seq)):
        # Add the entire sequence to the input and only keep the next word for the output
        in_seq, out_seq = seq[:i], seq[i]
        # If the sentence is shorter than max_length, fill it up with empty words
        in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
        # Map the output to one-hot encoding
        out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
        # Add and image corresponding to the HTML file
        image_data.append(features[img_no])
        # Cut the input sentence to 100 tokens, and add it to the input data
        X.append(in_seq[-100:])
        y.append(out_seq)

X, y, image_data = np.array(X), np.array(y), np.array(image_data)

19 19


In [31]:
print(image_data.shape)
print(X.shape)
print(y.shape)
print(features.shape)

(6654, 2048)
(6654, 100)
(6654, 703)
(19, 2048)


In [5]:
image_features = Input(shape=(2048,))
image_flat = Dense(128, activation='relu')(image_features)
ir2_out = RepeatVector(max_caption_len)(image_flat)

language_input = Input(shape=(max_caption_len,))
language_model = Embedding(vocab_size, 200, input_length=max_caption_len)(language_input)
language_model = LSTM(256, return_sequences=True)(language_model)
language_model = LSTM(256, return_sequences=True)(language_model)
language_model = TimeDistributed(Dense(128, activation='relu'))(language_model)

decoder = concatenate([ir2_out, language_model])
decoder = LSTM(512, return_sequences=True)(decoder)
decoder = LSTM(512, return_sequences=False)(decoder)
decoder_output = Dense(vocab_size, activation='softmax')(decoder)

model = Model(inputs=[image_features, language_input], outputs=decoder_output)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 100, 200)     140600      input_3[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 2048)         0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   (None, 100, 256)     467968      embedding_1[0][0]                
__________________________________________________________________________________________________
dense_1 (D

In [6]:
#Train model
filepath="org-weights-epoch-{epoch:04d}---loss-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_weights_only=True, period=15)
#stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto')
callbacks_list = [checkpoint]

In [33]:
json_file = open('model(RNN).json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("org-weights-epoch-0075---loss-0.0039.hdf5")
loaded_model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
loaded_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 100, 200)     140600      input_3[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 2048)         0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   (None, 100, 256)     467968      embedding_1[0][0]                
__________________________________________________________________________________________________
dense_1 (D

In [15]:
# Train the neural network
loaded_model.fit([image_data, X], y, batch_size=64, shuffle=False, validation_split=0.1, callbacks=callbacks_list, verbose=1, epochs=120)

Train on 5988 samples, validate on 666 samples
Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120

Epoch 00015: saving model to org-weights-epoch-0015---loss-0.0697.hdf5
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120

Epoch 00030: saving model to org-weights-epoch-0030---loss-0.0198.hdf5
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120

Epoch 00045: saving model to org-weights-epoch-0045---loss-0.0103.hdf5
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch

KeyboardInterrupt: 

In [32]:
#model_json = model.to_json()
#with open("model(RNN).json", "w") as json_file:
#    json_file.write(model_json)
#print("Saved Model to Notebook")

Saved Model to Notebook


In [34]:
# map an integer to a word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [35]:
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
    # seed the generation process
    in_text = 'START'
    # iterate over the whole length of the sequence
    for i in range(900):
        # integer encode input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0][-100:]
        # pad input
        sequence = pad_sequences([sequence], maxlen=max_length)
        # predict next word
        yhat = model.predict([photo,sequence], verbose=0)
        # convert probability to integer
        yhat = np.argmax(yhat)
        # map integer to word
        word = word_for_id(yhat, tokenizer)
        # stop if we cannot map the word
        if word is None:
            break
        # append as input for generating the next word
        in_text += ' ' + word
        # Print the prediction
        print(' ' + word, end='')
        # stop if we predict the end of the sequence
        if word == 'END':
            break
    return

In [36]:
# Load and image, preprocess it for IR2, extract features and generate the HTML
test_image = img_to_array(load_img('../Spline_Out/TestPics/KT-3012-V2-NKBND.val', target_size=(299, 299)))
test_image = np.array(test_image, dtype=float)
test_image = preprocess_input(test_image)
test_features = IR2.predict(np.array([test_image]))
generate_desc(loaded_model, tokenizer, np.array(test_features), 100)

OSError: cannot identify image file '../Spline_Out/TestPics/KT-3012-V2-NKBND.val'