In [1]:
import sys
import pprint
import os
import time
import numpy as np
import h5py
import yaml, json
from PIL import Image
import IPython
%pylab inline
plt.rcParams['image.cmap'] = 'gray'

# get magic root folder
config_path="/work/ds_config.yaml"
with open(config_path) as f:
    configs = yaml.safe_load(f)
sys.path.append(configs['PROJECT_ROOT'])

# project-specific
from data_iters import iam_hdf5_iterator
from viz_tools import VizUtils, array_to_png
data_root = configs['DATA_ROOT']

# deep learning imports
import keras
from keras.utils.np_utils import to_categorical
import theano
theano.config.exception_verbosity='high'
theano.config.optimizer='fast_compile'

from keras.models import Sequential, Graph
from keras.layers.core import Dense, Dropout, Activation, Flatten, AutoEncoder, Merge
from keras.layers.convolutional import MaxPooling2D, Convolution2D, Convolution1D

from keras.optimizers import SGD, Adam

from seya.attention import SpatialTransformer

Populating the interactive namespace from numpy and matplotlib


Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled)


In [18]:
def iam_get_hw(hdf5_set, form_id):   
    hw_l, hw_t, hw_r, hw_b = VizUtils.iam_get_text_block(form_id, data_root)
    start=time.time() 
    img_array = hdf5_set[form_id][hw_t:hw_b,hw_l:hw_r]
    end = time.time()
    print end-start
    return img_array

class IamHandwritingIterator:
    def __init__(self, data_root, data_object, h_start=0., h_end=1.,fixed_width=None,crop_height=None):
        self.data_root = data_root
        self.limits = h_start, h_end
        self.forms_list = [ f[:-4] for f in os.listdir(os.path.join(data_root, "forms")) if f[-4:]==".png"]
        self.fixed_width=fixed_width
        self.crop_height=crop_height
        self.data_object = data_object
    
    def __iter__(self):
        return self
    
    @staticmethod
    def get_author(form_id, data_root):
        forms_data = os.path.join(data_root, "forms.txt")
        with open(forms_data) as f:
            for form_line in f:
                fields = form_line.split(' ')
                try:
                    b_form_id = fields[0]
                    author_id = fields[1]
                except IndexError:
                    continue
                if b_form_id == form_id:
                    return author_id
                
    @staticmethod
    def crop_and_resize(img, nb_rows=None, nb_cols=None, mode='L'):
        """ Crop height of and/or rescale width of img."""
        # crop to requisite rows
        if nb_rows is not None:
            assert img.shape[0] >= nb_rows
            img = img[:nb_rows,:]
        if nb_cols is not None:
            print "Resizing from {0} cols to {0}".format(img.shape[1], nb_cols)
            img_pil = Image.fromarray(np.array(img,dtype='float'), mode=mode)
            img_pil = img_pil.resize((nb_cols, img.shape[0]))
            img = np.array(img_pil.getdata(), dtype='int16').reshape(-1, nb_cols)
        return img
    
    def next(self):
        try:
            next_form = self.forms_list.pop()
        except IndexError:
            raise StopIteration
            
        # open img
        img = iam_get_hw(self.data_object, next_form)
        top = int(img.shape[0] * self.limits[0])
        bottom = int(img.shape[0] * self.limits[1])
        img = img[top:bottom,:]
        #img = self.crop_and_resize(img, self.crop_height, self.fixed_width)
        author = self.get_author(next_form, self.data_root)
        #author = 0
        return author, img

In [None]:
forms_path = os.path.join(data_root, "iam-processed/forms/forms.hdf5")    
with h5py.File(forms_path, "r") as f:
    foo = IamHandwritingIterator(data_root, f, 0., 1., fixed_width=None, crop_height=None)
    for i, (author, form) in enumerate(foo):
        IPython.display.clear_output(wait=True)
        form_png = array_to_png.get_png_from_array(form)
        print "Author:", author
        print form.shape
        print "{0} printed".format(i)
        IPython.display.display_png(form_png,raw=True)
        time.sleep(1)

    

In [None]:
%pdb

In [12]:
# Localization Net
batch_size = 256
epoch_size = 1000
nb_authors = 100


def locnet_model(input_shape = (1,120,120)):
    b = np.zeros((2, 3), dtype='float32')
    b[0, 0] = 1
    b[1, 1] = 1
    W = np.zeros((50, 6), dtype='float32')
    weights = [W, b.flatten()]
    # from seya's STN demo notebook:
    locnet = Sequential()
    locnet.add(MaxPooling2D(pool_size=(4,4), input_shape=input_shape))
    locnet.add(Convolution2D(20, 4, 4))
    locnet.add(MaxPooling2D(pool_size=(2,2)))
    locnet.add(Convolution2D(20, 4, 4))

    locnet.add(Flatten(input_shape=input_shape))
    locnet.add(Dense(50))
    locnet.add(Activation('relu'))
    locnet.add(Dense(6, weights=weights))
    
    return locnet

def stn_model(locnet, input_shape=(1,120,120), nb_outputs=100, downsample_factor=2):
    model = Sequential()
    model.add(SpatialTransformer(locnet, input_shape=input_shape, downsample_factor=downsample_factor))
    model.add(Convolution2D(32, 4, 4, border_mode='valid'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Convolution2D(32, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(8, 8)))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))

    model.add(Dense(nb_outputs))
    model.add(Activation('softmax'))

    adam = Adam(lr=0.0001)

    model.compile(loss='categorical_crossentropy', optimizer=adam)
    
    return model

def p_stn_model(locnets, input_shape=(1,120,120), nb_stns=10, nb_outputs=100, downsample_factor=2):
    model = Graph()
    # add STNs at start that each look at the same input
    model.add_input(name='input', input_shape=input_shape)
    stn_names = [ 'stn{0}'.format(i) for i in range(nb_stns) ]
    for i in range(nb_stns):
        model.add_node(SpatialTransformer(locnets[i], input_shape=input_shape, downsample_factor=downsample_factor),
                       name=stn_names[i], input='input')
    model.add_node(Convolution2D(32, 4, 4, border_mode='valid'), name='conv1', 
                   inputs=stn_names, concat_axis=1)
    model.add_node(Activation('relu'), name='relu1', input='conv1')
    model.add_node(MaxPooling2D(pool_size=(2, 2)), name='maxpool1', input='relu1')
    model.add_node(Convolution2D(32, 3, 3), name='conv2', input='maxpool1')
    model.add_node(Activation('relu'), name='relu2', input='conv2')
    model.add_node(MaxPooling2D(pool_size=(8, 8)), name='maxpool2', input='relu2')

    model.add_node(Flatten(), name='flatten1', input='maxpool2')
    model.add_node(Dense(256), name='dense1', input='flatten1')
    model.add_node(Activation('relu'), name='relu3', input='dense1')

    model.add_node(Dense(nb_outputs), name='dense2', input='relu3')
    model.add_node(Activation('softmax'), name='softmax1', input='dense2')
    model.add_output(name='output1', input='softmax1')

    adam = Adam(lr=0.0001)

    model.compile(loss={'output1': 'categorical_crossentropy'}, optimizer=adam)
    
    return model


In [13]:
nb_stns = 10
locnets = [ locnet_model() for i in range(nb_stns) ]
p_stn = p_stn_model(locnets, nb_stns=nb_stns)

In [17]:
def get_transform_fn(node):
    XX = node.get_input()
    YY = node.get_output()
    F = theano.function([XX],YY)
    return F

viz_funcs = [ get_transform_fn(p_stn.nodes['stn{0}'.format(i)] for i in range(nb_stns)) ]

def accumulate_batch(batch_size=32, iter_object, *args, **kwargs):
    batch = []
    while True:
        foo = iter_object(*args, **kwargs)
        try:
            for author, frag in foo:
                batch.append(frag, author)
                if len(batch) == batch_size:
                    yield batch
                    batch = []
            

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_input_shape',
 '_interpolate',
 '_linspace',
 '_meshgrid',
 '_repeat',
 '_transform',
 'build',
 'constraints',
 'count_params',
 'downsample_factor',
 'get_config',
 'get_input',
 'get_output',
 'get_output_mask',
 'get_params',
 'get_weights',
 'input',
 'input_shape',
 'locnet',
 'nb_input',
 'nb_output',
 'output_shape',
 'params',
 'previous',
 'regularizers',
 'return_theta',
 'set_input_shape',
 'set_name',
 'set_previous',
 'set_weights',
 'supports_masked_input',
 'trainable']