In [2]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import os
import pandas as pd
from tqdm import tqdm

In [3]:
dir_path = 'D:/celebA/'
i_path = 'img_align_celeba/img_align_celeba/'
landscap_label_path = 'list_landmarks_align_celeba.csv'
bbox_label_path = 'list_bbox_celeba.csv'
att_label_path = 'list_attr_celeba.csv'

In [4]:
landscap_df = pd.read_csv(dir_path + landscap_label_path)
bbox_df = pd.read_csv(dir_path + bbox_label_path)
att_df = pd.read_csv(dir_path + att_label_path)

In [5]:
outfile = dir_path + 'test_celebA2.tfrecord'

In [6]:
landscap_dataset = np.array(landscap_df)
bbox_dataset = np.array(bbox_df)
att_dataset = np.array(att_df)
np.random.seed(42)
np.random.shuffle(landscap_dataset)
data_size = len(landscap_dataset)
train_size = 0.7
train_dataset = landscap_dataset[0:int(data_size*train_size)]
test_dataset = landscap_dataset[int(data_size*train_size):]

In [7]:
def create_tfrecord(tfrecord_filename, landscap_data,bbox_data,att_data):
    with tf.io.TFRecordWriter(tfrecord_filename) as writer:
        for data_point in tqdm(landscap_data, desc="Writing TFRecord", unit="examples"):
            img_path = dir_path + i_path + data_point[0]
            idx = int(data_point[0][:-4])
            landscaps = data_point[1:]
            bboxs = bbox_data[idx][1:]
            atts = att_data[idx][1:]
            
            tf_example = create_tfrecord_example(img_path,landscaps,bboxs,atts)
            writer.write(tf_example.SerializeToString())

def create_tfrecord_example(img_path,landscaps,bboxs,attributes):
    image = tf.io.read_file(img_path)
    decode_img = tf.io.decode_jpeg(image, channels=3)
    encoded_image = tf.io.encode_jpeg(decode_img)

    feature = {
        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[encoded_image.numpy()])),
        'landscapes': tf.train.Feature(float_list=tf.train.FloatList(value=landscaps)),
        'bboxs' : tf.train.Feature(float_list=tf.train.FloatList(value=bboxs)),
        'attributes' : tf.train.Feature(float_list=tf.train.FloatList(value=attributes))
        
    }

    example = tf.train.Example(features=tf.train.Features(feature=feature))
    return example




In [8]:
create_tfrecord(outfile,test_dataset,bbox_dataset,att_dataset)

Writing TFRecord: 100%|██████████| 60780/60780 [1:21:17<00:00, 12.46examples/s]  


In [None]:
def parse(example_proto):
    feature_keys = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'landscapes' : tf.io.FixedLenFeature([10],tf.float32),
        'bboxs' : tf.io.FixedLenFeature([4],tf.float32),
        'attributes' : tf.io.FixedLenFeature([40],tf.float32)
        
    }

    example = tf.io.parse_single_example(example_proto, feature_keys)
    image = tf.io.decode_image(example['image'], channels=3)
    image = tf.reshape(image, (218,178,3)) 
    
    landscaps = example['landscapes']
    bboxs = example['bboxs']
    atts = example['attributes']
    
    dataset = {'img' : image,
               'landscaps' : landscaps,
               'bbox' : bboxs,
               'attributes' : atts
        
    }
    
    return dataset


In [None]:
train_file = outfile 
dataset = tf.data.TFRecordDataset(train_file)
dataset = dataset.map(parse)
batch_size=1
dataset = dataset.batch(batch_size)
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
train_dataset = iter(dataset)

In [None]:
batch = next(train_dataset)

In [None]:
batch.keys()

In [None]:
plt.imshow(batch['img'][0])
plt.scatter(batch['landscaps'][0][::2],batch['landscaps'][0][1::2])