In [1]:
# h5py document: http://docs.h5py.org/en/stable/quick.html
import os
import h5py
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.notebook import tqdm

In [2]:
def _read_images(grayscale_path, image_name_list):
    image_list = []
    for name in tqdm(image_name_list, \
                                desc='Reading {0} images for conversion'.format(len(image_name_list))):
        img = Image.open(grayscale_path + name).convert('L')
        img = np.array(img).reshape(32, 32) / 255.0
        image_list.append(img)
    return image_list

In [5]:
def normal2hdf5(grayscale_path, csv_file_path, hdf5_file_path):
    if os.path.exists(hdf5_file_path) == False:
        os.mkdir(hdf5_file_path)
        
    name_label = pd.read_csv(csv_file_path, usecols=['PicName', 'Label'])

    name_list = (np.array(name_label.loc[:, 'PicName']) + '.bmp').tolist()
    label_list = np.array(name_label.loc[:, 'Label']).tolist()

    image_num = len(name_list)
    
    assert image_num == len(label_list)
    
    print('Totaly {img_num} images of \'normal\' types.'\
         .format(img_num = image_num))
    
    # read images and save them as hdf5 each 50000 images
    quotient = image_num // 50000 + 1
    
    for itr in range(1, quotient + 1): # start from 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!
        print('Converting {0}/{1} set of \'normal\' images'.format(itr, quotient))
        if itr != quotient:
            divided_name_list = name_list[(itr-1)*50000: itr*50000]
            image_list = _read_images(grayscale_path, divided_name_list)
        elif itr == quotient:
            divided_name_list = name_list[(itr-1)*50000: image_num]
            image_list = _read_images(grayscale_path, divided_name_list)
        
        with h5py.File('{path}normal_{cnt}.h5'.format(path=hdf5_file_path, cnt=itr)\
                       , 'w') as hdf:
            hdf.create_dataset('image_array', data=np.array(image_list))
    
    print('{0} image sets saved as normal_number.h5 in {1}.'.format(quotient, grayscale_path))

In [6]:
grayscale_path = '/root/ISOT-2010/isot/grayscale/'
isot_csv = '/root/ISOT-2010/isot/isot.csv'
hdf5_file_path = '/root/ISOT-2010/isot/normal/'

normal2hdf5(grayscale_path, isot_csv, hdf5_file_path)

Totaly 986708 images of 'normal' types.
Converting 1/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 2/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 3/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 4/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 5/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 6/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 7/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 8/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 9/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 10/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 11/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 12/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 13/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 14/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 15/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 16/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 17/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 18/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 19/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 50000 images for conversion', max=50000.0, style=…


Converting 20/20 set of 'normal' images


HBox(children=(FloatProgress(value=0.0, description='Reading 36708 images for conversion', max=36708.0, style=…


20 image sets saved as normal_number.h5 in /root/ISOT-2010/isot/grayscale/.
