# CIFAR10

In [1]:
import h5py

In [18]:
cifar = h5py.File('cifar10.hdf5', 'r')

In [8]:
list(cifar.attrs.keys())

[u'split',
 u'h5py_interface_version',
 u'fuel_convert_version',
 u'fuel_convert_command']

In [11]:
cifar.attrs['split']

array([ ('test', 'features', 50000, 60000, <HDF5 object reference (null)>,  True, '.'),
       ('test', 'targets', 50000, 60000, <HDF5 object reference (null)>,  True, '.'),
       ('train', 'features',     0, 50000, <HDF5 object reference (null)>,  True, '.'),
       ('train', 'targets',     0, 50000, <HDF5 object reference (null)>,  True, '.')], 
      dtype=[('split', 'S5'), ('source', 'S8'), ('start', '<i8'), ('stop', '<i8'), ('indices', 'O'), ('available', '?'), ('comment', 'S1')])

In [12]:
cifar.attrs['fuel_convert_version']

'0.2'

In [13]:
cifar.attrs['fuel_convert_command']

'fuel-convert cifar10'

# Flowers

Let's try to crop the data

In [3]:
import os

classes = [
    'daisy',
    'dandelion',
    'roses',
    'sunflowers',
    'tulips'
]
os.listdir('../ALI_old/flower_photos/' + classes[2])[:5]

['15274443248_76b9f3eb24.jpg',
 '9406573080_60eab9278e_n.jpg',
 '8181940917_1ac63937d5_n.jpg',
 '7551637034_55ae047756_n.jpg',
 '3412874275_ca78ee024d_m.jpg']

In [2]:
import os, sys
import re
import Image, ImageOps

size = 64, 64

proc = 0
for c in classes:
    for fname in os.listdir('flower_photos/' + c):
        infile = os.path.join('flower_photos', c, fname)
        outfile = os.path.join('flower_small', c, re.sub('jpe?g$', 'png', fname))
        if infile != outfile:
            try:
                im = Image.open(infile)
                im = ImageOps.fit(
                    im,
                    size,
                    Image.ANTIALIAS
                )
                im.save(outfile, "png")
            except IOError:
                print "cannot create thumbnail for '%s'" % infile
            proc += 1
            if proc % 1000 == 0:
                print(proc)

1000
2000
3000


In [22]:
import random
import math
int(random.random() * 10)

6

In [36]:
import os, sys
import re
import Image, ImageOps

#Crop0 = random crop 20
#Crop1 = random crop 30
#Crop2 = rotate 15, crop 20
#Crop3 = rotate 15, crop 20
size = 32
rand_crop = 30

proc = 0
for fname in os.listdir('../ALI_old/102flowers/'):
    infile = os.path.join('../ALI_old/102flowers', fname)
    outfile = os.path.join('../ALI_old/102flowers_small', re.sub('jpe?g$', 'png', fname))
    if infile != outfile:
        try:
            im = Image.open(infile)
            im = ImageOps.fit(
                im,
                (size, size),
                Image.ANTIALIAS
            )
            im.save(outfile, "png")
        except IOError:
            print "cannot create thumbnail for '%s'" % infile
        proc += 1
        if proc % 1000 == 0:
            print(proc)

1000
2000
3000
4000
5000
6000
7000
8000


## Fit into H5PY

In [4]:
import scipy.io
import numpy as np

In [6]:
labels = scipy.io.loadmat('../ALI_old/imagelabels.mat')

In [7]:
flower_labels = labels['labels'].transpose()

In [8]:
flower_labels.shape

(8189, 1)

In [37]:
pref = '../ALI_old/102flowers_small/'
img_files = sorted([os.path.join(pref, x) for x in os.listdir(pref)])
img_files[:10]

['../ALI_old/102flowers_small/image_00001.png',
 '../ALI_old/102flowers_small/image_00002.png',
 '../ALI_old/102flowers_small/image_00003.png',
 '../ALI_old/102flowers_small/image_00004.png',
 '../ALI_old/102flowers_small/image_00005.png',
 '../ALI_old/102flowers_small/image_00006.png',
 '../ALI_old/102flowers_small/image_00007.png',
 '../ALI_old/102flowers_small/image_00008.png',
 '../ALI_old/102flowers_small/image_00009.png',
 '../ALI_old/102flowers_small/image_00010.png']

In [38]:
from scipy.misc import imread
print(img_files[:5])

img_arrays = []
for fpath in img_files:
    im = imread(fpath).transpose([2, 0, 1])
    img_arrays.append(im)

['../ALI_old/102flowers_small/image_00001.png', '../ALI_old/102flowers_small/image_00002.png', '../ALI_old/102flowers_small/image_00003.png', '../ALI_old/102flowers_small/image_00004.png', '../ALI_old/102flowers_small/image_00005.png']


In [39]:
#features_all = []
features = np.stack(img_arrays)
len(features)

8189

In [51]:
features_flip_y = [np.flip(x, axis=2) for x in features_all]
features_flip_x = [np.flip(x, axis=2) for x in features_all]

In [52]:
features_all_flat = np.concatenate(features_all + features_flip_y + features_flip_x, axis=0)
features_all_flat.shape

(122835, 3, 32, 32)

In [53]:
labels_all_flat = np.repeat(flower_labels, len(features_all) * 3, axis=0)
labels_all_flat.shape

(122835, 1)

In [27]:
len(flower_labels)

8189

In [40]:
data = (('train', 'features', features),
            ('train', 'targets', flower_labels))

In [43]:
from fuel.converters.base import fill_hdf5_file
import h5py
h5file = h5py.File('flowers102_32x32_original.hdf5', mode='w')
fill_hdf5_file(h5file, data)

h5file['features'].dims[0].label = 'batch'
h5file['features'].dims[1].label = 'channel'
h5file['features'].dims[2].label = 'height'
h5file['features'].dims[3].label = 'width'
h5file['targets'].dims[0].label = 'batch'
h5file['targets'].dims[1].label = 'index'

h5file.flush()
h5file.close()

In [44]:
flowers = h5py.File('flowers102_32x32_original.hdf5', 'r')

In [45]:
flowers['features'].shape

(8189, 3, 32, 32)

In [33]:
flowers['targets'][:]

array([[77],
       [77],
       [77],
       ..., 
       [62],
       [62],
       [62]], dtype=uint8)

In [34]:
flower_labels

array([[77],
       [77],
       [77],
       ..., 
       [62],
       [62],
       [62]], dtype=uint8)

In [35]:
imshow

NameError: name 'imshow' is not defined

In [42]:
flowers.close()