In [None]:
## Importing Libraries

import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image

print(tf.__version__)

In [None]:
## training에 사용할 image size
img_width = 128
img_height = 128

In [None]:
## tfrecord file 이름과 경로 설정
tfrecord_train = 'train.tfrecord'
tfrecord_test = 'test.tfrecord'
tfrecord_dir = 'tfrecords'

if not os.path.exists(tfrecord_dir):
    os.makedirs(tfrecord_dir)

In [None]:
## image file 저장되어 있는 directory 이름
image_dir = 'caltech101'

In [None]:
## image file들이 저장되어 있는 경로 설정
cur_dir = os.getcwd()
image_path = os.path.join(cur_dir, image_dir)

## image file 경로에 directory가 몇 개 있는지(class 수) 확인
print ('{} categories in {}'.format(len(os.listdir(image_path)), image_path))

categories = sorted(os.listdir(image_path))
num_categ = len(categories)
print (categories)

In [None]:
## image file 확장자
valid_exts = ['.jpg', '.png', '.jpeg', '.gif']

## directory 별로 image file을 읽어서 확장자를 확인하여 list에 저장, label도 함께 저장
imgnames = []
labels = []
for label, category in enumerate(categories):
    filelist = os.listdir(os.path.join(image_path, category))
    imglist = []
    for f in filelist:
        ext = os.path.splitext(f)[-1]
        ## 올바른 확장자인지 check
        if ext.lower() not in valid_exts:
            continue
        img = Image.open(os.path.join(image_path, category, f))
        img = np.asarray(img)        
        ## 해당 class의 image list에 추가
        imglist.append(f)        
    ## 전체 image list에 추가
    imgnames += imglist
    ## label 추가
    labels += [label]*len(imglist)
    print('{} {} images are found / Label : {}'.format(len(imglist), category, label))
print('Total : {} images are found'.format(len(imgnames)))

In [None]:
# Data shuffle 후에 80%는 training set / 20%는 test set으로 나눔
numfiles = len(labels)
idxrand = np.random.permutation(numfiles)
idxtrain = idxrand[:int(0.8*numfiles)]
idxtest = idxrand[int(0.8*numfiles):]
print(len(idxtrain), len(idxtest))

In [None]:
## tfrecord file 경로 설정
train_tfr_path = os.path.join(cur_dir, tfrecord_dir, tfrecord_train)
test_tfr_path = os.path.join(cur_dir, tfrecord_dir, tfrecord_test)

## tfrecord writer
writer_train = tf.python_io.TFRecordWriter(train_tfr_path)
writer_test = tf.python_io.TFRecordWriter(test_tfr_path)

In [None]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [None]:
## training set을 tfrecord로 저장
train_cnt = 0
for idx in idxtrain:
    label = labels[idx]
    img_path = os.path.join(image_dir, categories[label], imgnames[idx])
    
    image = Image.open(img_path)    
    image = image.resize((img_height, img_width))
    
    image = np.asarray(image)
    ## rank=3인지 check하여 아닐 경우 3으로 확장(gray image)
    try:
        ch = image.shape[2]
    except:
        image = np.tile(np.expand_dims(image, axis=2), [1, 1, 3])
        ch = image.shape[2]
        
    if ch == 1:
        image = np.tile(image, [1, 1, 3])
        ch = image.shape[2]
    ## channel이 3인지 확인
    if ch != 3:        
        continue
    image_str = image.tobytes()
    
    example = tf.train.Example(features=tf.train.Features(feature={
        'image': _bytes_feature(image_str),
        'label': _int64_feature(label)
    }))
    writer_train.write(example.SerializeToString())
    train_cnt += 1
print ("{} train images are saved.".format(train_cnt))

In [None]:
## test set을 tfrecord로 저장
test_cnt = 0
for idx in idxtest:
    label = labels[idx]
    img_path = os.path.join(image_dir, categories[label], imgnames[idx])
    image = Image.open(img_path)    
    image = image.resize((img_height, img_width))
    
    image = np.asarray(image)
    ## rank=3인지 check하여 아닐 경우 3으로 확장(gray image)
    try:
        ch = image.shape[2]
    except:
        image = np.tile(np.expand_dims(image, axis=2), [1, 1, 3])
        ch = image.shape[2]
        
    if ch == 1:
        image = np.tile(image, [1, 1, 3])
        ch = image.shape[2]
    ## channel이 3인지 확인
    if ch != 3:        
        continue
    image_str = image.tobytes()
    
    example = tf.train.Example(features=tf.train.Features(feature={
        'image': _bytes_feature(image_str),
        'label': _int64_feature(label)
    }))
    writer_test.write(example.SerializeToString())
    test_cnt += 1
print ("{} test images are saved.".format(test_cnt))

In [None]:
writer_train.close()
writer_test.close()