In [None]:
from PIL import Image
import os
import numpy as np
import tensorflow as tf
import glob
import random

OUTPUT_TRAIN_TFRECORD_DIR = '/content/drive/MyDrive/VTuber_recognition/train_tfrecords'
OUTPUT_TEST_TFRECORD_DIR = '/content/drive/MyDrive/VTuber_recognition/test_tfrecords'


def make_tfrecords(file, label, base, outdir):
    print(base)
    tfrecords_filename = os.path.join(outdir, '{}.tfrecords'.format(base))
    writer = tf.python_io.TFRecordWriter(tfrecords_filename)

    with Image.open(file) as image_object:  # (128x128x3) image

        image = np.array(image_object)
        height = image.shape[0]
        width = image.shape[1]
        dim = image.shape[2]

        example = tf.train.Example(features=tf.train.Features(feature={
                "height": tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
                "width" : tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
                "dim"   : tf.train.Feature(int64_list=tf.train.Int64List(value=[dim])),
                "label" : tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
                "image" : tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_object.tobytes()]))
                }))

    writer.write(example.SerializeToString())
    writer.close()


def divide_train_test(face, train_ratio):
    face_num = len(face)
    divide_idx = int(face_num * train_ratio)
    train, test = face[:divide_idx], face[divide_idx:]

    return train, test


random.seed(1)

Korone_face = glob.glob('/content/drive/MyDrive/VTuber_recognition/face/Korone/*.jpg')
random.shuffle(Korone_face)
print('Num of Korone faces : %d' %(len(Korone_face)))
Korone_train, Korone_test = divide_train_test(Korone_face, train_ratio=0.9)

Okayu_face = glob.glob('/content/drive/MyDrive/VTuber_recognition/face/Okayu/*.jpg')
random.shuffle(Okayu_face)
print('Num of Okayu faces : %d' %(len(Okayu_face)))
Okayu_train, Okayu_test = divide_train_test(Okayu_face, train_ratio=0.9)

Mio_face = glob.glob('/content/drive/MyDrive/VTuber_recognition/face/Mio/*.jpg')
random.shuffle(Mio_face)
print('Num of Mio faces : %d' %(len(Mio_face)))
Mio_train, Mio_test = divide_train_test(Mio_face, train_ratio=0.9)

Subaru_face = glob.glob('/content/drive/MyDrive/VTuber_recognition/face/Subaru/*.jpg')
random.shuffle(Subaru_face)
print('Num of Subaru faces : %d' %(len(Subaru_face)))
Subaru_train, Subaru_test = divide_train_test(Subaru_face, train_ratio=0.9)

Fubuki_face = glob.glob('/content/drive/MyDrive/VTuber_recognition/face/Fubuki/*.jpg')
random.shuffle(Fubuki_face)
print('Num of Fubuki faces : %d' %(len(Fubuki_face)))
Fubuki_train, Fubuki_test = divide_train_test(Fubuki_face, train_ratio=0.9)

# for train data
if not os.path.exists(OUTPUT_TRAIN_TFRECORD_DIR):
    os.makedirs(OUTPUT_TRAIN_TFRECORD_DIR)

num = 0
for (label, files) in enumerate([Korone_train, Okayu_train, Mio_train, Subaru_train, Fubuki_train]):
    print(label, len(files))
    for file in files:
        base = '{:05}'.format(num)
        make_tfrecords(file, label, base, outdir=OUTPUT_TRAIN_TFRECORD_DIR)
        num += 1

# for test data
if not os.path.exists(OUTPUT_TEST_TFRECORD_DIR):
    os.makedirs(OUTPUT_TEST_TFRECORD_DIR)

num = 0
for (label, files) in enumerate([Korone_test, Okayu_test, Mio_test, Subaru_test, Fubuki_test]):
    print(label, len(files))
    for file in files:
        base = '{:05}'.format(num)
        make_tfrecords(file, label, base, outdir=OUTPUT_TEST_TFRECORD_DIR)
        num += 1