In [1]:
import csv
import time
import numpy as np
from skimage import io

from texture import fisher

Using TensorFlow backend.


## Describable Textures Dataset

In [2]:
dtd_dir = '/home/ross/Dropbox/benchmark/dtd'
img_dir = dtd_dir+'/images/'

# just use the first split (of 10) for now
train_reader = csv.reader(open(dtd_dir+'/labels/train1.txt'))
train_list = [row[0] for row in train_reader]

val_reader = csv.reader(open(dtd_dir+'/labels/val1.txt'))
val_list = [row[0] for row in val_reader]

test_reader = csv.reader(open(dtd_dir+'/labels/test1.txt'))
test_list = [row[0] for row in test_reader]

len(train_list), len(val_list), len(test_list)

(1880, 1880, 1880)

In [3]:
classes = set([s.split('/')[0] for s in train_list])
classes = sorted(list(classes))
len(classes), classes

(47,
 ['banded',
  'blotchy',
  'braided',
  'bubbly',
  'bumpy',
  'chequered',
  'cobwebbed',
  'cracked',
  'crosshatched',
  'crystalline',
  'dotted',
  'fibrous',
  'flecked',
  'freckled',
  'frilly',
  'gauzy',
  'grid',
  'grooved',
  'honeycombed',
  'interlaced',
  'knitted',
  'lacelike',
  'lined',
  'marbled',
  'matted',
  'meshed',
  'paisley',
  'perforated',
  'pitted',
  'pleated',
  'polka-dotted',
  'porous',
  'potholed',
  'scaly',
  'smeared',
  'spiralled',
  'sprinkled',
  'stained',
  'stratified',
  'striped',
  'studded',
  'swirly',
  'veined',
  'waffled',
  'woven',
  'wrinkled',
  'zigzagged'])

In [4]:
def to_class(s):
    return classes.index(s.split('/')[0])

X_train = [io.imread(img_dir+f) for f in train_list]
y_train = np.array([to_class(f) for f in train_list])

X_val = [io.imread(img_dir+f) for f in val_list]
y_val = np.array([to_class(f) for f in val_list])

X_test = [io.imread(img_dir+f) for f in test_list]
y_test = np.array([to_class(f) for f in test_list])

In [5]:
# train on train+val blindly, test on test
X_train += X_val
y_train = np.concatenate([y_train, y_val])

In [6]:
[x.shape for x in X_train[0:10]]

[(480, 640, 3),
 (500, 497, 3),
 (400, 305, 3),
 (458, 610, 3),
 (640, 640, 3),
 (480, 640, 3),
 (490, 600, 3),
 (480, 480, 3),
 (432, 432, 3),
 (640, 640, 3)]

## FV-CNN with VGG16 ImageNet features

In [7]:
fv_vgg = fisher.FVCNN('vgg16', k=64)
fv_vgg.cnn.output_shape, fv_vgg.D

((None, None, None, 512), 512)

In [8]:
t = time.time()
train_score = fv_vgg.fit(X_train, y_train)
print('Train SVC score: ', train_score)
print('fit() elapsed: ', time.time()-t)

(sample of) img_feats.shapes: [(300, 512), (225, 512), (108, 512), (266, 512), (400, 512)]
all_feats.shape : (810376, 512)
Fitting GMM with 64 clusters...
Train SVC score:  0.999468085106383
fit() elapsed:  1195.6614303588867


In [9]:
# Test Score
fv_vgg.score(X_test, y_test)

0.6622340425531915

## FV-CNN with VGG19 ImageNet features

In [10]:
fv_vggd = fisher.FVCNN('vgg19', k=64)
fv_vggd.cnn.output_shape, fv_vggd.D

((None, None, None, 512), 512)

In [11]:
t = time.time()
train_score = fv_vggd.fit(X_train, y_train)
print('Train SVC score: ', train_score)
print('fit() elapsed: ', time.time()-t)

(sample of) img_feats.shapes: [(300, 512), (225, 512), (108, 512), (266, 512), (400, 512)]
all_feats.shape : (810376, 512)
Fitting GMM with 64 clusters...
Train SVC score:  1.0
fit() elapsed:  936.503992319107


In [12]:
# Test Score
fv_vggd.score(X_test, y_test)

0.6664893617021277

## FV-CNN with ResNet50 ImageNet features

In [13]:
fv_res50 = fisher.FVCNN('resnet50', k=64)
fv_res50.cnn.output_shape, fv_res50.D

((None, None, None, 2048), 2048)

In [14]:
t = time.time()
train_score = fv_res50.fit(X_train, y_train)
print('Train SVC score: ', train_score)
print('fit() elapsed: ', time.time()-t)

(sample of) img_feats.shapes: [(300, 2048), (256, 2048), (130, 2048), (285, 2048), (400, 2048)]
all_feats.shape : (884952, 2048)
Fitting GMM with 64 clusters...
Train SVC score:  1.0
fit() elapsed:  3480.918876647949


In [15]:
# Test Score
fv_res50.score(X_test, y_test)

0.7015957446808511