In [68]:
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 15 14:54:01 2017

@author: paradiph
"""

import os, sys
import glob
import cPickle as pkl
import numpy as np
import PIL.Image as Image
#from skimage.transform import resize

from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Activation, Dropout
from keras import optimizers
from keras import losses

#%%

In [None]:
# Assume this script is being run from the root directory of the git repo

#######################################
# Dataset
#######################################
# The data is already split into training and validation datasets
# The training dataset has:
# - 82782 items
# - 984 MB of data
# The validation dataset has:
# - 40504 items
# - 481 MB of data
#
# There is also a pickled dictionary that maps image filenames (minutes the
# .jpg extension) to a list of 5 strings (the 5 human-generated captions).
# This dictionary is an OrderedDict with 123286 entries.

input_dim = 64*64*3 - 32*32*3
output_dim = 32*32*3
batch_size = 128
num_epochs = 15

#%% PATHS
mscoco="datasets/mscoco_inpainting/inpainting/"
split="train2014"
caption_path="dict_key_imgID_value_caps_train_and_valid.pkl"



In [76]:
import os
workingdir = os.path.join(os.getenv('HOME'), 'ift6266-h17-project')
print("working dir", workingdir)
os.chdir(workingdir)

('working dir', '/u/paradiph/ift6266-h17-project')


In [77]:
#some preprocessing
def normalize_dataset(dataset):
    dataset = dataset.astype('float32')
    dataset /= 255
    return dataset

def denormalize_dataset(dataset):
    dataset *= 255
    dataset = dataset.astype('uint8')
    return dataset

In [78]:
#%% Load training images and captions
data_path = os.path.join(mscoco, split)
caption_path = os.path.join(mscoco, caption_path)
with open(caption_path) as fd:
    caption_dict = pkl.load(fd)

# Get a list of all training images full filename paths
print data_path + "/*.jpg"
train_images_paths = glob.glob(data_path + "/*.jpg")
#batch_imgs = imgs[batch_idx*batch_size:(batch_idx+1)*batch_size]

#%% Create dataset containing the images pixel data

X_train_outer = []
X_train_inner = []
X_train_caption = []

def flatten_outer_frame(img_array, dim_outer=(64, 64), dim_inner=(32, 32)):
    flat = np.copy(img_array)
    flat = flat.flatten()
    img_top = flat[0:64*16,:]
    img_bottom = flat[64*48:64*64,:]
    no_middle_cols = np.delete(img_array, range(16,48))
    img_middle = no_middle_cols.flatten()[32*16:32*48]

    final_img = np.concatenate((img_top, img_middle, img_bottom), axis=0)
    print("outer_frame shape = ", np.shape(final_img))
    return final_img
    
for i, img_path in enumerate(train_images_paths):
    img = Image.open(img_path)
    img_array = np.array(img)

    # File names look like this: COCO_train2014_000000520978.jpg
    cap_id = os.path.basename(img_path)[:-4]

    ### Get input/target from the images
    center = (int(np.floor(img_array.shape[0] / 2.)), int(np.floor(img_array.shape[1] / 2.)))
    if len(img_array.shape) == 3:
        X_outer = np.copy(img_array)
        X_outer_mask = np.array(np.ones(np.shape(img_array)), dtype='bool')
        X_outer_mask[center[0]-16:center[0]+16, center[1]-16:center[1]+16, :] = False
        zipped = zip(X_outer, X_outer_mask)
        X_outer = X_outer.flatten()
        X_outer_mask = X_outer_mask.flatten()
        X_outer = X_outer[X_outer_mask]
        
        X_inner = img_array[center[0]-16:center[0]+16, center[1] - 16:center[1]+16, :]
        X_inner = X_inner.flatten()
    else:
        continue
        #X_outer = np.copy(img_array)
        #X_outer[center[0]-16:center[0]+16, center[1]-16:center[1]+16] = 0
        #X_inner = img_array[center[0]-16:center[0]+16, center[1] - 16:center[1]+16]

    
    #Image.fromarray(img_array).show()
    X_train_inner.append(X_inner)
    X_train_outer.append(X_outer)
    captions = np.array([cap_id] + caption_dict[cap_id])
    X_train_caption.append(captions)

X_train_inner = np.array(X_train_inner)
X_train_outer = np.array(X_train_outer)
X_train_caption = np.array(X_train_caption)

print("Finished loading full dataset...")
print("X_train_inner shape   = ", np.shape(X_train_inner))
print("X_train_outer shape   = ", np.shape(X_train_outer))
print("X_train_caption shape = ", np.shape(X_train_caption))

print("First 3 rows and first 10 pixels of X_train_inner:")
print(X_train_inner[0, range(10)])
print(X_train_inner[1, range(10)])
print(X_train_inner[2, range(10)])

datasets/mscoco_inpainting/inpainting/train2014/*.jpg
Finished loading full dataset...
('X_train_inner shape   = ', (82611, 3072))
('X_train_outer shape   = ', (82611, 9216))
('X_train_caption shape = ', (82611,))
First 3 rows and first 10 pixels of X_train_inner:
[57 69 57 65 79 56 63 81 43 53]
[197 202 195 167 164 147 104  87  57 102]
[104 100  97  77  80  53 172 181 128 242]


In [79]:
import os
workingdir = os.path.join(os.getenv('HOME'), 'ift6266-h17-project')
print("working dir", workingdir)
os.chdir(workingdir)
os.getcwd()

('working dir', '/u/paradiph/ift6266-h17-project')


'/u/paradiph/ift6266-h17-project'

In [80]:
# split into input (X) and output (Y) variables
from sklearn.cross_validation import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_train_outer,
                                                    X_train_inner,
                                                    test_size=0.20,
                                                    random_state=1)

print("Splitting dataset into training and testing sets with shuffling...")
print("X_train.shape = ", X_train.shape)
print("X_test.shape  = ", X_test.shape)
print("Y_train.shape = ", Y_train.shape)
print("Y_test.shape  = ", Y_test.shape)

X_train = normalize_dataset(X_train)
X_test = normalize_dataset(X_test)
Y_train = normalize_dataset(Y_train)
Y_test = normalize_dataset(Y_test)

Splitting dataset into training and testing sets with shuffling...
('X_train.shape = ', (66088, 9216))
('X_test.shape  = ', (16523, 9216))
('Y_train.shape = ', (66088, 3072))
('Y_test.shape  = ', (16523, 3072))


In [81]:
print(Y_train.shape)
Y_train[0,1500:1550]

(66088, 3072)


array([ 0.48235294,  0.45882353,  0.46666667,  0.38039216,  0.37254903,
        0.42352942,  0.41176471,  0.40784314,  0.47843137,  0.43137255,
        0.43529412,  0.51764709,  0.3882353 ,  0.40000001,  0.47450981,
        0.51372552,  0.52156866,  0.58039218,  0.53725493,  0.53725493,
        0.57647061,  0.29803923,  0.28627452,  0.3137255 ,  0.40392157,
        0.38431373,  0.40784314,  0.45490196,  0.41960785,  0.43921569,
        0.33333334,  0.28235295,  0.30980393,  0.33333334,  0.28235295,
        0.31764707,  0.38431373,  0.40392157,  0.5529412 ,  0.32941177,
        0.34509805,  0.48235294,  0.35686275,  0.37254903,  0.47843137,
        0.33725491,  0.33333334,  0.39607844,  0.33333334,  0.32549021], dtype=float32)

In [84]:
print("Creating MLP model...")
# Create model
model = Sequential()
model.add(Dense(units=100, input_shape=(input_dim, )))
model.add(Activation('relu'))
#model.add(Dropout(0.5))
model.add(Dense(units=(500)))
model.add(Activation('relu'))
#model.add(Dropout(0.5))
model.add(Dense(units=output_dim))
#model.add(Activation('relu'))
# Try using a sigmoid activation after

# Print model summary
print("Model summary:")
print(model.summary())

print("Compiling model...")
# Compile model
adam_optimizer = optimizers.Adam(lr=0.00005) # Default lr = 0.001
model.compile(loss='mse', optimizer=adam_optimizer, metrics=['mse'])
#model.compile(loss=losses.mean_absolute_error, optimizer='adam', metrics=['accuracy'])

print("Fitting model...")
# Fit the model
model.fit(X_train, Y_train, epochs=num_epochs, batch_size=batch_size, verbose=2)

# evaluate the model
scores = model.evaluate(X_train, Y_train, batch_size=batch_size)
print("Training score %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
scores = model.evaluate(X_test, Y_test, batch_size=batch_size)
print("Testing score %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

#%% Save model
print("Saving model as 'last_model.h5'")
model.save('last_model.h5')

Creating MLP model...
Model summary:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 100)               921700    
_________________________________________________________________
activation_17 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_23 (Dense)             (None, 500)               50500     
_________________________________________________________________
activation_18 (Activation)   (None, 500)               0         
_________________________________________________________________
dense_24 (Dense)             (None, 3072)              1539072   
Total params: 2,511,272.0
Trainable params: 2,511,272.0
Non-trainable params: 0.0
_________________________________________________________________
None
Compiling model...
Fitting model...
Epoch 1/50
3s - loss: 0.0636 - mean_squared_erro

In [85]:
#%% Load model
print("Loading model from disk 'last_model.h5'...")
model = load_model('last_model.h5')

Loading model from disk 'last_model.h5'...


In [86]:
# evaluate the model
scores = model.evaluate(X_train, Y_train, batch_size=batch_size)
print("Training score %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
scores = model.evaluate(X_test, Y_test, batch_size=batch_size)
print("Testing score %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))




In [87]:
X_test_predict = model.predict(X_test, batch_size=batch_size)

print(X_test_predict.shape)
print(Y_test.shape)

X_test_predict = denormalize_dataset(X_test_predict)
Y_test = denormalize_dataset(Y_test)

num_rows = X_test_predict.shape[0]
X_test_predict = np.reshape(X_test_predict, (num_rows, 32, 32, 3))

num_rows = Y_test.shape[0]
Y_test = np.reshape(Y_test, (num_rows, 32, 32, 3))

(16523, 3072)
(16523, 3072)


In [62]:


print(X_test_predict[0])
print(Y_test[0])

[[[84 70 48]
  [83 70 47]
  [82 69 47]
  ..., 
  [81 68 47]
  [81 69 46]
  [82 69 47]]

 [[81 68 46]
  [81 68 45]
  [81 67 45]
  ..., 
  [80 67 45]
  [80 67 45]
  [81 67 45]]

 [[79 67 44]
  [79 66 44]
  [80 67 44]
  ..., 
  [78 66 45]
  [79 65 44]
  [79 65 44]]

 ..., 
 [[75 59 38]
  [75 60 38]
  [76 60 39]
  ..., 
  [75 59 38]
  [75 59 38]
  [74 58 37]]

 [[77 60 39]
  [77 60 39]
  [77 60 40]
  ..., 
  [75 59 39]
  [76 59 38]
  [75 59 38]]

 [[77 61 40]
  [78 60 40]
  [77 60 40]
  ..., 
  [76 60 39]
  [76 60 40]
  [76 61 40]]]
[[[ 93  98  92]
  [147 149 144]
  [176 177 171]
  ..., 
  [110 101  96]
  [166 156 154]
  [176 166 164]]

 [[ 82  89  82]
  [131 136 129]
  [178 179 173]
  ..., 
  [193 185 182]
  [215 205 204]
  [199 189 188]]

 [[115 125 116]
  [135 143 132]
  [168 171 162]
  ..., 
  [225 216 217]
  [230 220 221]
  [193 183 184]]

 ..., 
 [[ 75  79  64]
  [ 55  59  44]
  [ 49  53  38]
  ..., 
  [127  94  89]
  [126 108  98]
  [ 76  77  61]]

 [[127 133 119]
  [114 120 106]
  

In [88]:

for row in range(10):
    img = Image.fromarray(X_test_predict[row,:,:,:])
    img.show()
    img.save('X_test_predict_' + str(row) + '.jpg')

    img = Image.fromarray(Y_test[row,:,:,:])
    img.show()
    img.save('Y_test_' + str(row) + '.jpg')
