In [1]:
import csv
import time
import numpy as np
from skimage import io

from keras import layers, models

from texture import initializers
from texture.layers import KernelPooling
from texture.cnn import keras_apps

Using TensorFlow backend.


## Describable Textures Dataset

In [2]:
dtd_dir = '/home/administrator/Dropbox/benchmark/dtd'
img_dir = dtd_dir+'/images/'

# just use the first split (of 10) for now
train_reader = csv.reader(open(dtd_dir+'/labels/train1.txt'))
train_list = [row[0] for row in train_reader]

val_reader = csv.reader(open(dtd_dir+'/labels/val1.txt'))
val_list = [row[0] for row in val_reader]

test_reader = csv.reader(open(dtd_dir+'/labels/test1.txt'))
test_list = [row[0] for row in test_reader]

len(train_list), len(val_list), len(test_list)

(1880, 1880, 1880)

In [3]:
classes = set([s.split('/')[0] for s in train_list])
classes = sorted(list(classes))
len(classes), classes

(47,
 ['banded',
  'blotchy',
  'braided',
  'bubbly',
  'bumpy',
  'chequered',
  'cobwebbed',
  'cracked',
  'crosshatched',
  'crystalline',
  'dotted',
  'fibrous',
  'flecked',
  'freckled',
  'frilly',
  'gauzy',
  'grid',
  'grooved',
  'honeycombed',
  'interlaced',
  'knitted',
  'lacelike',
  'lined',
  'marbled',
  'matted',
  'meshed',
  'paisley',
  'perforated',
  'pitted',
  'pleated',
  'polka-dotted',
  'porous',
  'potholed',
  'scaly',
  'smeared',
  'spiralled',
  'sprinkled',
  'stained',
  'stratified',
  'striped',
  'studded',
  'swirly',
  'veined',
  'waffled',
  'woven',
  'wrinkled',
  'zigzagged'])

In [4]:
def to_class(s):
    return classes.index(s.split('/')[0])

X_train = [io.imread(img_dir+f) for f in train_list]
y_train = np.array([to_class(f) for f in train_list])

X_val = [io.imread(img_dir+f) for f in val_list]
y_val = np.array([to_class(f) for f in val_list])

X_test = [io.imread(img_dir+f) for f in test_list]
y_test = np.array([to_class(f) for f in test_list])

In [5]:
# train on train+val blindly, test on test
X_train += X_val
y_train = np.concatenate([y_train, y_val])

In [6]:
[x.shape for x in X_train[0:10]]

[(480, 640, 3),
 (500, 497, 3),
 (400, 305, 3),
 (458, 610, 3),
 (640, 640, 3),
 (480, 640, 3),
 (490, 600, 3),
 (480, 480, 3),
 (432, 432, 3),
 (640, 640, 3)]

In [7]:
def center_crop(img, side_length):
    '''Resize short side to side_length, then center square crop.'''
    h, w, _ = img.shape
    new_h, new_w = side_length, side_length
    if h > w:
        new_h = int(side_length*(h/w))
    else:
        new_w = int(side_length*(w/h))
    resized_img = transform.resize(img, (new_h, new_w))
    
    h_offset = (new_h - side_length) / 2
    w_offset = (new_w - side_length) / 2
    
    return resized[h_offset:h_offset+side_length,w_offset:w_offset+side_length]


X_train = np.array([crop_center(x, 224, 224) for x in X_train])
X_test  = np.array([crop_center(x, 224, 224) for x in X_test])

In [8]:
X_train.shape, X_test.shape

((3760, 224, 224, 3), (1880, 224, 224, 3))

## Kernel Pooling CNN with ResNet, p=4, d=4096

In [9]:
resnet = keras_apps['resnet50'](include_top=False)
resnet.output_shape

(None, None, None, 2048)

In [10]:
x = resnet.output
x = KernelPooling(p=4)(x)

In [11]:
_model = models.Model(inputs=resnet.input, outputs=x)
kp_output = _model.predict(X_train)
kp_output.shape

(3760, 14337)

In [12]:
x = resnet.output
x = KernelPooling(p=4)(x)

log_reg = initializers.LogReg(kp_output, y_train)
preds = layers.Dense(47, activation='softmax', 
                 kernel_initializer=log_reg, bias_initializer=log_reg)(x)

kp_resnet = models.Model(inputs=resnet.input, outputs=preds)
kp_resnet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [21]:
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD, Adam
from keras.callbacks import LearningRateScheduler
from keras.utils import to_categorical

In [22]:
lr = 0.01
momentum = 0.9
decay = 0.0001

sgd_opt = SGD(lr=lr, momentum=momentum, decay=decay, clipvalue=1.0)
adam_opt = Adam(lr=lr, clipvalue=1.0)

image_gen = ImageDataGenerator(samplewise_center=True, horizontal_flip=True)
image_gen.fit(X_train)

val_gen = ImageDataGenerator()
val_gen.fit(X_test)

In [17]:
kp_resnet.compile(adam_opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
epochs = 50
batch_size = 32
steps_per_epoch = X_train.shape[0] // batch_size

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

hist = kp_resnet.fit_generator(image_gen.flow(X_train, y_train, batch_size=batch_size), 
                        validation_data=val_gen.flow(X_test, y_test, batch_size=batch_size),
                        epochs=epochs, steps_per_epoch=steps_per_epoch,)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
  8/117 [=>............................] - ETA: 2:24 - loss: 3.8583 - acc: 0.0195

KeyboardInterrupt: 

## FV-CNN with ResNet50 ImageNet features

fv_res50 = fisher.FVCNN('resnet50', k=64)
fv_res50.cnn.output_shape, fv_res50.D

t = time.time()
train_score = fv_res50.fit(X_train, y_train)
print('Train SVC score: ', train_score)
print('fit() elapsed: ', time.time()-t)

# Test Score
fv_res50.score(X_test, y_test)