# CIFAR10

In [17]:
import h5py

In [18]:
cifar = h5py.File('cifar10.hdf5', 'r')

In [8]:
list(cifar.attrs.keys())

[u'split',
 u'h5py_interface_version',
 u'fuel_convert_version',
 u'fuel_convert_command']

In [11]:
cifar.attrs['split']

array([ ('test', 'features', 50000, 60000, <HDF5 object reference (null)>,  True, '.'),
       ('test', 'targets', 50000, 60000, <HDF5 object reference (null)>,  True, '.'),
       ('train', 'features',     0, 50000, <HDF5 object reference (null)>,  True, '.'),
       ('train', 'targets',     0, 50000, <HDF5 object reference (null)>,  True, '.')], 
      dtype=[('split', 'S5'), ('source', 'S8'), ('start', '<i8'), ('stop', '<i8'), ('indices', 'O'), ('available', '?'), ('comment', 'S1')])

In [12]:
cifar.attrs['fuel_convert_version']

'0.2'

In [13]:
cifar.attrs['fuel_convert_command']

'fuel-convert cifar10'

# Flowers

Let's try to crop the data

In [1]:
import os

classes = [
    'daisy',
    'dandelion',
    'roses',
    'sunflowers',
    'tulips'
]
os.listdir('flower_photos/' + classes[2])[:5]

['15274443248_76b9f3eb24.jpg',
 '9406573080_60eab9278e_n.jpg',
 '8181940917_1ac63937d5_n.jpg',
 '7551637034_55ae047756_n.jpg',
 '3412874275_ca78ee024d_m.jpg']

In [2]:
import os, sys
import re
import Image, ImageOps

size = 64, 64

proc = 0
for c in classes:
    for fname in os.listdir('flower_photos/' + c):
        infile = os.path.join('flower_photos', c, fname)
        outfile = os.path.join('flower_small', c, re.sub('jpe?g$', 'png', fname))
        if infile != outfile:
            try:
                im = Image.open(infile)
                im = ImageOps.fit(
                    im,
                    size,
                    Image.ANTIALIAS
                )
                im.save(outfile, "png")
            except IOError:
                print "cannot create thumbnail for '%s'" % infile
            proc += 1
            if proc % 1000 == 0:
                print(proc)

1000
2000
3000


In [22]:
import random
import math
int(random.random() * 10)

6

In [29]:
import os, sys
import re
import Image, ImageOps

#Crop0 = random crop 20
#Crop1 = random crop 30
#Crop2 = rotate 15, crop 20
#Crop3 = rotate 15, crop 20
size = 32
rand_crop = 30

proc = 0
for fname in os.listdir('102flowers/'):
    infile = os.path.join('102flowers', fname)
    outfile = os.path.join('102flowers_crop3', re.sub('jpe?g$', 'png', fname))
    if infile != outfile:
        try:
            im = Image.open(infile)
            w, h = im.size
            if w > 72 and h > 72:
                im = im.rotate(random.random() * 30 - 15)
                im = im.crop((20, 20, w - 20, h - 20))
            else:
                print('crop failed:', (w, h))
            
            im = ImageOps.fit(
                im,
                (size, size),
                Image.ANTIALIAS
            )
            im.save(outfile, "png")
        except IOError:
            print "cannot create thumbnail for '%s'" % infile
        proc += 1
        if proc % 1000 == 0:
            print(proc)

1000
2000
3000
4000
5000
6000
7000
8000


## Fit into H5PY

In [12]:
import scipy.io
import numpy as np

In [5]:
labels = scipy.io.loadmat('imagelabels.mat')

In [6]:
flower_labels = labels['labels'].transpose()

In [7]:
flower_labels.shape

(8189, 1)

In [39]:
from scipy.misc import imread
pref = '102flowers_crop3/'
img_files = [os.path.join(pref, x) for x in os.listdir(pref)]
print(img_files[:5])

img_arrays = []
for fpath in img_files:
    im = imread(fpath).transpose([2, 0, 1])
    img_arrays.append(im)

['102flowers_crop3/image_01290.png', '102flowers_crop3/image_07262.png', '102flowers_crop3/image_02921.png', '102flowers_crop3/image_03259.png', '102flowers_crop3/image_04727.png']


In [40]:
#features_all = []
features = np.stack(img_arrays)
features_crop3 = features
features_all.append(features_crop3)
len(features_all)

5

In [51]:
features_flip_y = [np.flip(x, axis=2) for x in features_all]
features_flip_x = [np.flip(x, axis=2) for x in features_all]

In [52]:
features_all_flat = np.concatenate(features_all + features_flip_y + features_flip_x, axis=0)
features_all_flat.shape

(122835, 3, 32, 32)

In [53]:
labels_all_flat = np.repeat(flower_labels, len(features_all) * 3, axis=0)
labels_all_flat.shape

(122835, 1)

In [54]:
data = (('train', 'features', features_all_flat),
            ('train', 'targets', labels_all_flat))

In [58]:
from fuel.converters.base import fill_hdf5_file
import h5py
h5file = h5py.File('flowers102_32x32.hdf5', mode='w')
fill_hdf5_file(h5file, data)

h5file['features'].dims[0].label = 'batch'
h5file['features'].dims[1].label = 'channel'
h5file['features'].dims[2].label = 'height'
h5file['features'].dims[3].label = 'width'
h5file['targets'].dims[0].label = 'batch'
h5file['targets'].dims[1].label = 'index'

h5file.flush()
h5file.close()

In [59]:
flowers = h5py.File('flowers102_32x32.hdf5', 'r')

In [60]:
flowers['features'].shape

(122835, 3, 32, 32)