In [10]:
import numpy as np

import matplotlib
import matplotlib.pyplot as plt

from PIL import Image

import glob
import os
import re

%matplotlib inline

In [11]:
cwd = os.getcwd()

SAMPLE_SIZE = 6270
SPLIT_RATIO = 0.9

PADDING_SIZE = 7
IMAGE_SIZE = 256
BATCH_SIZE = 16

In [12]:
index = np.arange(SAMPLE_SIZE) + 1
np.random.seed(1234)
np.random.shuffle(index)

split = (int) (SAMPLE_SIZE * SPLIT_RATIO)
x_train_idx = index[0:split]
x_test_idx = index[split:]

In [13]:
ALL_WORD = ['', 'statement', 'if', 'else', 'elseif', 'for', 'while', 'end', '<END>']
N_ONEHOT_WORD = len(ALL_WORD) - 1
CHARS_MAP = {v: k for k, v in enumerate(ALL_WORD)}
IDX_MAP = dict(list(enumerate(ALL_WORD)))

def to_onehot(word):
    n_onehot = N_ONEHOT_WORD
    idx = CHARS_MAP[word]
    if idx == 0:
        return np.zeros(n_onehot)
    else:
        onehot_vec = np.zeros(n_onehot)
        onehot_vec[idx - 1] = 1
        return onehot_vec

def convert_to_feature_list(feature_words):
    onehots = []
    for word in feature_words:
        onehots.append(to_onehot(word))
    return np.array(onehots)
        

def convert_to_input_set(tokens, k):
    n_tokens = len(tokens)
    padded_token = [''] * k + tokens + ["<END>"]
    res = []
    for i in range(n_tokens):
        res.append((
            convert_to_feature_list(padded_token[i:i + k]),
            to_onehot(padded_token[i + k])
        ))
    return res

In [14]:
def input_generator(indexs):
    files = glob.glob("./data/*")
    
    while(True):
        x_word = []
        x_image = []
        y = []
        
        for idx in indexs:
            picture_files = list(filter(re.compile(".\/data\/sample-" + str(idx) + "-\d+.jpg").search, files))
            lang_file = list(filter(re.compile(".\/data\/sample-" + str(idx) + "-lang.txt").search, files))
            
            if len(lang_file) == 0:
                continue
            
            with open(lang_file[0], 'r') as file:
                lang = [l.strip().split(" ")[0] for l in file.read().split("\n") if len(l)]
                lang = list(map(lambda x: "statement" if x.startswith("statement") else x, lang))
            
            embed_lang = convert_to_input_set(lang, PADDING_SIZE)
            
            for picture in picture_files:
                image = Image.open(picture).convert('L')
                image = image.convert()
                image = image.resize((IMAGE_SIZE, IMAGE_SIZE))
                image = np.asarray(image).reshape((IMAGE_SIZE, IMAGE_SIZE, 1))
                image = image / 255

                for lang in embed_lang:
                    x_word.append(lang[0])
                    x_image.append(image)
                    y.append(lang[1])
                    
                    if len(y) >= BATCH_SIZE:
                        yield [np.array(x_word), np.array(x_image)], np.array(y)
                        x_word = []
                        x_image = []
                        y = []

In [15]:
from keras.layers import Input, Dense, Flatten, Concatenate, Conv2D, MaxPooling2D, Dropout, LSTM, RepeatVector
from keras.optimizers import RMSprop
from keras.models import Model

def generate_model():
    word_input = Input(shape=(PADDING_SIZE, N_ONEHOT_WORD))
    image_input = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 1))
    
    img = Conv2D(32, (3, 3), padding='same', activation='relu')(image_input)
    img = Conv2D(32, (3, 3), padding='same', activation='relu')(image_input)
    img = MaxPooling2D((4,4))(img)
    img = Conv2D(64, (3, 3), padding='same', activation='relu')(img)
    img = Conv2D(64, (3, 3), padding='same', activation='relu')(img)
    img = MaxPooling2D((4,4))(img)
    img = Dropout(0.25)(img)
    img = Conv2D(128, (3, 3), padding='same', activation='relu')(img)
    img = Conv2D(128, (3, 3), padding='same', activation='relu')(img)
    img = MaxPooling2D()(img)
    
    img = Flatten()(img)
    img = Dense(1024, activation='relu')(img)
    img = Dropout(0.3)(img)
    img = Dense(1024, activation='relu')(img)
    img = Dropout(0.3)(img)
    
    img = RepeatVector(PADDING_SIZE)(img)
    
    w = LSTM(64, return_sequences=True)(word_input)
    w = LSTM(64, return_sequences=True)(w)
    
    x = Concatenate()([w, img])
    x = LSTM(512, return_sequences=True, dropout=0.75)(x)
    x = LSTM(512)(x)
    
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    outputs = Dense(N_ONEHOT_WORD, activation='softmax')(x)

    optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
    model = Model(inputs=[word_input, image_input], outputs=outputs)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

print(generate_model().summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 256, 256, 1)  0                                            
__________________________________________________________________________________________________
conv2d_20 (Conv2D)              (None, 256, 256, 32) 320         input_8[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_10 (MaxPooling2D) (None, 64, 64, 32)   0           conv2d_20[0][0]                  
__________________________________________________________________________________________________
conv2d_21 (Conv2D)              (None, 64, 64, 64)   18496       max_pooling2d_10[0][0]           
__________________________________________________________________________________________________
conv2d_22 

In [16]:
from keras.layers import Input, Dense, Flatten, Concatenate, Conv2D, MaxPooling2D, Dropout, LSTM
from keras.models import Model

def generate_model_small():
    word_input = Input(shape=(PADDING_SIZE, N_ONEHOT_WORD))
    image_input = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 1))
    
    img = Conv2D(4, (3, 3), padding='same', activation='relu')(image_input)
    img = MaxPooling2D()(img)
    img = Conv2D(8, (3, 3), padding='same', activation='relu')(img)
    img = MaxPooling2D()(img)
    img = MaxPooling2D()(img)
    img = Dropout(0.1)(img)
    img = Conv2D(16, (3, 3), padding='same', activation='relu')(img)
    img = MaxPooling2D()(img)
    img = MaxPooling2D()(img)
    img = MaxPooling2D()(img)
    img = Dropout(0.1)(img)
    img = Flatten()(img)
    img = Dense(16, activation='relu')(img)
    
    w = LSTM(8, return_sequences=True)(word_input)
    w = LSTM(8)(w)
    
    x = Concatenate()([w, img])
    x = Dense(8, activation='relu')(x)
    
    outputs = Dense(N_ONEHOT_WORD, activation='softmax')(x)

    model = Model(inputs=[word_input, image_input], outputs=outputs)
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# print(generate_model_small().summary())

In [17]:
from keras.callbacks import Callback
from keras import backend as K

class TensorBoard(Callback):

    def __init__(self, log_dir='./logs',
                 write_graph=False,
                 start_steps=0,
                 batch_freq=1):
        super(TensorBoard, self).__init__()
        
        global tf, projector
        import tensorflow as tf
        from tensorflow.contrib.tensorboard.plugins import projector
        
        self.log_dir = log_dir
        self.batch_freq = batch_freq
        self.write_graph = write_graph
        
        self.start_steps = start_steps
        self.steps_counter = 1

    def set_model(self, model):
        self.model = model
        self.sess = K.get_session()
        
        self.merged = tf.summary.merge_all()

        if self.write_graph:
            self.writer = tf.summary.FileWriter(self.log_dir,
                                                self.sess.graph)
        else:
            self.writer = tf.summary.FileWriter(self.log_dir)
            
    def save_scalar(self, logs):
        log = logs or {}
        
        for name, value in logs.items():
            if name in ['batch', 'size']:
                continue
            summary = tf.Summary()
            summary_value = summary.value.add()
            summary_value.simple_value = value.item()
            summary_value.tag = name
            self.writer.add_summary(
                summary,
                self.start_steps + self.steps_counter
            )
        self.writer.flush()
            
    def on_batch_end(self, batch, logs=None):
        if self.steps_counter % self.batch_freq == 0:
            self.save_scalar(logs)
        self.steps_counter += 1

    def on_epoch_end(self, epoch, logs=None):
        self.save_scalar(logs)        

    def on_train_end(self, _):
        self.writer.close()

class ModelCheckpoint(Callback):

    def __init__(self,
                 filepath,
                 start_steps=0,
                 batch_freq=1):
        super(ModelCheckpoint, self).__init__()
        
        self.filepath = filepath
        
        self.steps_counter = 0
        self.start_steps = start_steps
        self.batch_freq = batch_freq
        
        self.steps_counter
        
    def save_model(self):
        self.model.save_weights(self.filepath, overwrite=True)
        
    def on_batch_end(self, batch, logs=None):
        if self.steps_counter % self.batch_freq == 0:
            self.save_model()
        self.steps_counter += 1
        
    def on_epoch_end(self, epoch, logs=None):
        self.save_model()

In [18]:
model = generate_model()
# model.load_weights('./model/model-weight.hdf5')

model.fit_generator(
    input_generator(x_train_idx),
    steps_per_epoch=30443,
    validation_data=input_generator(x_test_idx),
    validation_steps=3382,
    max_queue_size=5,
    epochs=3,
    use_multiprocessing=True,
    callbacks=[
        TensorBoard(
            log_dir="./model/logs/",
            batch_freq=20
        ),
        ModelCheckpoint(
            filepath="./model/model-weight.hdf5",
            batch_freq=100
        )
    ]
)

Epoch 1/3
    1/30443 [..............................] - ETA: 23:23:12 - loss: 2.0863 - acc: 0.0625

  % delta_t_median)


    2/30443 [..............................] - ETA: 12:39:09 - loss: 2.0583 - acc: 0.1875

  % delta_t_median)


Epoch 2/3


<keras.callbacks.History at 0x7f5b7d9a8940>