In [10]:
from celebA import CelebA
from dataset import load_celeba

In [4]:
celeba = CelebA(drop_features=[
    'Attractive',
    'Pale_Skin',
    'Blurry',
    ])

In [5]:
train_split = celeba.split('training'  , drop_zero=False)
valid_split = celeba.split('validation', drop_zero=False)

In [12]:
dataset = load_celeba("../CelebA")

In [26]:
train_split[train_split.Eyeglasses==0].Eyeglasses

image_id
000001.jpg    0
000002.jpg    0
000003.jpg    0
000004.jpg    0
000005.jpg    0
             ..
162766.jpg    0
162767.jpg    0
162768.jpg    0
162769.jpg    0
162770.jpg    0
Name: Eyeglasses, Length: 152249, dtype: int64

In [8]:
valid_split

Unnamed: 0_level_0,5_o_Clock_Shadow,Arched_Eyebrows,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,Brown_Hair,...,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young,image_id
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
162771.jpg,0,0,0,0,1,1,0,0,0,0,...,1,0,1,0,0,1,0,0,1,162771.jpg
162772.jpg,0,0,0,0,0,0,1,1,0,0,...,1,0,1,0,0,0,0,1,0,162772.jpg
162773.jpg,0,1,0,0,0,1,0,0,0,1,...,0,0,0,0,0,1,0,0,1,162773.jpg
162774.jpg,1,0,0,0,0,0,0,0,0,1,...,1,1,0,0,0,0,0,1,1,162774.jpg
162775.jpg,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,1,162775.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182633.jpg,0,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,1,0,0,1,182633.jpg
182634.jpg,0,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,1,182634.jpg
182635.jpg,0,0,0,0,0,0,0,0,0,1,...,1,1,1,0,0,1,0,0,1,182635.jpg
182636.jpg,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,0,1,182636.jpg


In [51]:
import os
import pandas as pd


class CelebA():
    '''Wraps the celebA dataset, allowing an easy way to:
         - Select the features of interest,
         - Split the dataset into 'training', 'test' or 'validation' partition.
    '''

    def __init__(self, main_folder='../CelebA/', selected_features=None, drop_features=[]):
        self.main_folder = main_folder
        self.images_folder = os.path.join(main_folder, 'img_align_celeba/img_align_celeba/')
        self.attributes_path = os.path.join(main_folder, 'list_attr_celeba.csv')
        self.partition_path = os.path.join(main_folder, 'list_eval_partition.csv')
        self.selected_features = selected_features
        self.features_name = []
        self.__prepare(drop_features)

    def __prepare(self, drop_features):
        '''do some preprocessing before using the data: e.g. feature selection'''
        # attributes:
        if self.selected_features is None:
            self.attributes = pd.read_csv(self.attributes_path)
            self.num_features = 40
        else:
            self.num_features = len(self.selected_features)
            self.selected_features = self.selected_features.copy()
            self.selected_features.append('image_id')
            self.attributes = pd.read_csv(self.attributes_path)[self.selected_features]

        for feature in drop_features:
            if feature in self.attributes:
                self.attributes = self.attributes.drop(feature, axis=1)
                self.num_features -= 1

        self.attributes.replace(to_replace=-1, value=0, inplace=True)
        self.features_name = list(self.attributes.columns)[:-1]

        # load ideal partitioning:
        self.partition = pd.read_csv(self.partition_path)

    def split(self, name='training', drop_zero=False):
        '''Returns the ['training', 'validation', 'test'] split of the dataset'''
        # select partition split:
        if name is 'training':
            to_drop = self.partition.where(lambda x: x != 0).dropna()
        elif name is 'validation':
            to_drop = self.partition.where(lambda x: x != 1).dropna()
        elif name is 'test':  # test
            to_drop = self.partition.where(lambda x: x != 2).dropna()
        else:
            raise ValueError('CelebA.split() => `name` must be one of [training, validation, test]')

        partition = self.partition.drop(index=to_drop.index)
        # join attributes with selected partition:
        joint = partition.join(self.attributes, how='inner', lsuffix='left').drop('partition', axis=1)

        if drop_zero is True:
            # select rows with all zeros values
            return joint.loc[(joint[self.features_name] == 1).any(axis=1)]
        elif 0 <= drop_zero <= 1:
            zero = joint.loc[(joint[self.features_name] == 0).all(axis=1)]
            zero = zero.sample(frac=drop_zero)
            return joint.drop(index=zero.index)

        return joint

In [52]:
celeba = CelebA(drop_features=[
    'Attractive',
    'Pale_Skin',
    'Blurry',
    ])

In [53]:
train_split = celeba.split('training'  , drop_zero=False)

          image_id  partition
0       000001.jpg          0
1       000002.jpg          0
2       000003.jpg          0
3       000004.jpg          0
4       000005.jpg          0
...            ...        ...
162765  162766.jpg          0
162766  162767.jpg          0
162767  162768.jpg          0
162768  162769.jpg          0
162769  162770.jpg          0

[162770 rows x 2 columns]


In [55]:
train_split.index

Int64Index([     0,      1,      2,      3,      4,      5,      6,      7,
                 8,      9,
            ...
            162760, 162761, 162762, 162763, 162764, 162765, 162766, 162767,
            162768, 162769],
           dtype='int64', length=162770)

In [38]:
attr = pd.read_csv('../CelebA/list_attr_celeba.csv')

In [57]:
train_label = train_split.Eyeglasses

In [111]:
test = train_label.to_numpy()
tmp = [1 if j==0 else 1 for j in test]

In [120]:
import glob
import imageio
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
from tensorflow.keras import layers
import time

def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(8*8*256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((8, 8, 256)))

    model.add(layers.Conv2DTranspose(128, (3, 3), strides=(1,1), padding='same', use_bias=False))
    assert model.output_shape == (None, 8, 8, 128)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 16, 16, 64)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(3, (3, 3), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 32, 32, 3)

    return model

In [121]:
test = make_generator_model()

In [66]:
import tensorflow as tf
seed = tf.random.set_seed(1234)

In [87]:

print(tf.random.uniform([1], seed=2))
print(tf.random.uniform([1], seed=2))

tf.Tensor([0.5276288], shape=(1,), dtype=float32)
tf.Tensor([0.38491035], shape=(1,), dtype=float32)


In [107]:
test = [1,1,2,2,3,3]
label = [1,1,0,0,4,4]

In [108]:
train_dataset = (tf.data.Dataset.from_tensor_slices(test)
                        .shuffle(4,seed=2))
test_dataset = (tf.data.Dataset.from_tensor_slices(label)
                        .shuffle(4,seed=2))

In [109]:
for i, j in zip(train_dataset, test_dataset):
    print("{} : {}".format(i,j))

1 : 1
2 : 0
3 : 4
2 : 0
1 : 1
3 : 4
