# Model V2 (Smaller Unit) 

One of the major feedback I received was that the architecture unit is likely too large. In this variation, I'll test using a smaller unit size. 

In [1]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

plt.style.use('fivethirtyeight')
%matplotlib inline

from tensorflow.keras.applications import NASNetLarge 
from tensorflow.keras.applications.nasnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model, model_from_json, load_model
from tensorflow.keras.layers import Input, Dropout, Dense, Embedding, LSTM, add
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split

import pickle
import os
from timeit import default_timer as timer

import string
import re

Loading custom scripts

In [2]:
from SCRIPT.sequence_generator import *
from SCRIPT.evaluation_tools import *

File locations

In [3]:
flicker_img_dir = 'IMAGES/Flicker/Flicker8k_Dataset'
flicker_text_dir = 'IMAGES/Flicker/labels'

## Feature Extraction
Extract features using NASNetLarge

In [5]:
def feature_extractor(dir_, network):
    ''' 
    iterate through files in dir_ 
    and get features running on network
    return a dictionary with image id as a key
    '''
    model = network()
    model = Model(inputs = model.inputs, outputs = model.layers[-2].output)
    fnames = [x for x in os.listdir(dir_) if x.endswith('.jpg')]
    result = {}
    i = 1
    n = len(fnames)
    
    for fn in fnames:
        img = load_img(f'{dir_}/{fn}', target_size = (331, 331))
        img = np.expand_dims(img, 0)
        img = preprocess_input(img)
        feature = model.predict(img)
        ind = fn.split('.')[0]
        result[ind] = feature
        print(f'{i}/{n} feature extraction completed')
        i += 1
    return result

In [None]:
# run
features = feature_extractor(flicker_img_dir, NASNetLarge)

In [None]:
# Saving
#with open('PKL/features.pkl', 'wb') as fp:
#    pickle.dump(features, fp, pickle.HIGHEST_PROTOCOL)

In [7]:
# loading
with open('PKL/features.pkl', 'rb') as fp:
    features = pickle.load(fp)

extract feature from paintings too

In [None]:
img_dir = 'IMAGES\paintings'
art_features = feature_extractor(img_dir, NASNetLarge)

In [None]:
# saving the extracted features
#with open('PKL/art_features.pkl', 'wb') as fp:
#    pickle.dump(art_features, fp, pickle.HIGHEST_PROTOCOL)

In [9]:
# loading
with open('PKL/art_features.pkl', 'rb') as fp:
    art_features = pickle.load(fp)

## Get Descriptions

In [10]:
# read the description file
with open(f'{flicker_text_dir}/Flickr8k.token.txt', 'r') as fn:
    text = fn.readlines()

In [11]:
# extract only image id and description
pattern = '([0-9a-z_]*)\.jpg.*\\t(.*)\\n'
p = re.compile(pattern)
descriptions_pairs = [p.findall(x)[0] for x in text]

In [12]:
def description_preprocessing(list_of_tuples, n = None):
    '''
    given description pairs, return a dictionary of descriptions
    if n is specified, only select the n number or less descriptions
    per image
    '''
    descriptions = {}

    table_ = str.maketrans('', '', string.punctuation+string.digits)

    for ind, text in list_of_tuples:
        text = text.lower()
        text = str.translate(text, table_)
        text = [x for x in text.split() if len(x) > 1] # remove trailing alphabet
        text = 'seqini ' + ' '.join(text) + ' seqfin' # add initial and ending tokens
        if ind in descriptions:
            descriptions[ind].append(text)
        else:
            descriptions[ind] = [text]
    if n:
        # if n is assigned cap number of description for each image to be n
        for k, v in descriptions.items():
            if len(v) > n:
                descriptions[k] = list(np.random.choice(v, n, replace = False))
    return descriptions

In [13]:
# saving
#with open('PKL/descriptions.pkl', 'wb') as fp:
#    pickle.dump(descriptions, fp)

# loading
with open('PKL/descriptions.pkl', 'rb') as fp:
    descriptions = pickle.load(fp)

## Crossvalidation
Split the photo sets into train/test/val

In [14]:
train_list, test_list = train_test_split(list(descriptions.keys()), test_size = 0.3, random_state = 22)
val_list, test_list = train_test_split(test_list, test_size = 0.5, random_state = 22)

## Model Training
Train model with photo files

In [15]:
# initialize processor with non-art descriptions and features
processor = sequence_generator(descriptions, features)

In [16]:
# get inputs and output
train_X1, train_X2, train_Y = processor.train_generator(train_list)
val_X1, val_X2, val_Y = processor.validation_generator(val_list)

In [17]:
# get max length and number of vocabularies
max_length = processor.get_max_length()
num_vocab = processor.get_num_vocab()

In [None]:
#first path
in1 = Input(shape = (4032,))
img_layer1 = Dropout(0.5)(in1)
img_layer2 = Dense(64, activation = 'relu')(img_layer1)
# second path
in2 = Input(shape=(max_length,))
text_layer1 = Embedding(num_vocab, 256, mask_zero = True)(in2)
text_layer2 = Dropout(0.5)(text_layer1)
text_layer3 = LSTM(64)(text_layer2)
# outputting
output_layer1 = add([img_layer2, text_layer3])
output_layer2 = Dense(64, activation = 'relu')(output_layer1)
output = Dense(num_vocab, activation = 'softmax')(output_layer2)
# compile model
model = Model(inputs = [in1, in2], outputs = output)
model.compile(loss = 'categorical_crossentropy', 
             optimizer = 'adam')

cp = EarlyStopping(patience = 3, restore_best_weights= True)

# training
start = timer()

history = model.fit([train_X1, train_X2], train_Y, 
                    epochs=10, 
                    validation_data = ([val_X1, val_X2], val_Y),
                    workers = 7, 
                    callbacks = [cp], 
                    verbose = 1
                   )
end = timer()
elapsed = end - start
print('Total Time Elapsed: ', int(elapsed//60), ' minutes ', (round(elapsed%60)), ' seconds')

Epoch 1/10


In [None]:
# saving model
model.save('MODEL/small_model')

In [None]:
#loading model
model = load_model('MODEL/small_model')

In [None]:
plot_performance(history)

In [None]:
# get the tokenizer
tokenizer = processor.get_tokenizer()

In [None]:
get_bleu(img_inds, feature_dict, tokenizer, max_length, model, text_ref_dict)

In [None]:
descriptor_1 = descriptor()

