In [42]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow import keras
import shutil
import datetime
from sklearn.metrics import accuracy_score

In [2]:
data_dir = os.path.join('..', '..', 'bigyan', 'artgen.bigyan', 'data', 'cartoonset100k')
os.listdir(data_dir)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [216]:
np.random.seed(42)
tf.random.set_seed(42)

In [217]:
cache_directory= os.path.join('C:', 'Users', 'paudy' , 'tmp_images', 'deep_learning_tmp', 'cartoonset')

In [280]:
def build_and_compile_model():
    model = keras.Sequential([
    keras.layers.Conv2D(32, 3, (2, 2), activation='relu', input_shape=(256, 256, 3)), # no parameters
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3), 
    keras.layers.Conv2D(64, 3, (2, 2), activation='relu'), # no parameters
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Conv2D(128, 5, (2, 2), activation='relu'), # no parameters
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Conv2D(256, 5, (2, 2), activation='relu'), # no parameters
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Flatten(),
    keras.layers.Dense(10), # no activation! 
    keras.layers.Softmax()
    ])
    model.compile(optimizer='adam',
     loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
     metrics=['accuracy'] #the metric that is reported
     )
    return model

In [281]:
def copy_images_to_labels_folder(img_files, label_files, feature_name, cache_directory, label_idx_dict, reset_images=False, reuse_images=False):
    """
    copies images to the feature name subfolder in the cache directory
    images are copied into sub directories according the the lablel 
    """
    if not os.path.exists(cache_directory):
        raise Exception(f'{cache_directory} does not exist')
    feature_dir = os.path.join(cache_directory, feature_name)
    
    if os.path.exists(feature_dir):
        if reset_images:
            print(f'{feature_dir} does not exist, creating ... ')
            shutil.rmtree(feature_dir)
            os.mkdir(feature_dir)
        else:
            if reuse_images:
                #nothing to do 
                return feature_dir
            else:
                raise Exception(f'Invalid Arguments, {feature_dir} already exists, set reset_images to True to reset for {feature_name}')
    else:
        print(f'{feature_dir} does not exist, creating ... ')
        os.mkdir(feature_dir)
     
    feature_idx = label_idx_dict.get(feature_name, None)
        
    for img_file, label_file in list(zip(img_files, label_files)):
        label = pd.read_csv(label_file, header=None).loc[feature_idx][1]
        label_dir = os.path.join(feature_dir, str(label))
        if not os.path.exists(label_dir):
            print(f'{label_dir} does not exist, creating ... ')
            os.mkdir(label_dir)
        try:
            shutil.copy(img_file, label_dir)
        except:
            pass
    return feature_dir

In [282]:
def get_feature_idx_dicts(label_file):
    idx_label_dict = pd.read_csv(label_file, header=None)[0].to_dict()
    label_idx_dict = {v:k for k, v in idx_label_dict.items()}
    return idx_label_dict, label_idx_dict

In [283]:
for root, dirs, fnames_ in os.walk(data_dir):
    all_file_paths = [os.path.join(root, fname) for fname in fnames_]
img_files = [x for x in all_file_paths if x.endswith('.png')]
label_files = [x for x in all_file_paths if x.endswith('.csv')]

In [284]:
idx_label_dict, label_idx_dict = get_feature_idx_dicts(label_files[0])

In [285]:
label_idx_dict

{'eye_angle': 0,
 'eye_lashes': 1,
 'eye_lid': 2,
 'chin_length': 3,
 'eyebrow_weight': 4,
 'eyebrow_shape': 5,
 'eyebrow_thickness': 6,
 'face_shape': 7,
 'facial_hair': 8,
 'hair': 9,
 'eye_color': 10,
 'face_color': 11,
 'hair_color': 12,
 'glasses': 13,
 'glasses_color': 14,
 'eye_slant': 15,
 'eyebrow_width': 16,
 'eye_eyebrow_distance': 17}

In [286]:
# features_to_consider = ['hair_color', 'face_color', 'face_shape']
features_to_consider = ['eyebrow_thickness']

In [287]:
base_logdir = os.path.join('logs')
base_modelsdir = os.path.join('models')
n_epochs = 20
feature_counter = 2
validation_metrics = {}
for feature_name in features_to_consider:
    print(feature_name)
    training_dir = copy_images_to_labels_folder(img_files, label_files, feature_name, cache_directory, label_idx_dict, reuse_images=True)
    print(f'Created training dir: {training_dir}')
    train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
        training_dir,
        labels="inferred",
        label_mode="int",
        class_names=None,
        color_mode="rgb",
        batch_size=32,
        image_size=(256, 256),
        shuffle=True,
        seed=42,
        validation_split=.2,
        subset="training",
        interpolation="bilinear",
        follow_links=False,
        )
    validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    training_dir,
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=42,
    validation_split=.2,
    subset="validation",
    interpolation="bilinear",
    follow_links=False,
    )
    model = build_and_compile_model()
    this_logdir = os.path.join(base_logdir, feature_name)
    if not os.path.exists(this_logdir):
        os.mkdir(this_logdir)
    logdir = os.path.join(this_logdir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

    model.fit(train_dataset,  epochs=n_epochs, validation_data=validation_dataset, callbacks=[tensorboard_callback])
    print(model.evaluate(validation_dataset))  
    this_modeldir = os.path.join(base_modelsdir, feature_name)
    if not os.path.exists(this_modeldir):
        os.mkdir(this_modeldir)
    modelpath = os.path.join(this_modeldir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+f'epochs-{str(n_epochs)}.h5')
    model.save(modelpath, overwrite=True, include_optimizer=True)
    loaded_model = keras.models.load_model(modelpath)
    acc_score_sum = 0
    iterations = 0
    for x, y in validation_dataset:
        acc_score_sum += accuracy_score(y, np.argmax(loaded_model.predict(x), axis=1))
        iterations+= 1
    validation_metrics[feature_name] = acc_score_sum/iterations
    print(validation_metrics)
    feature_counter += 1

eyebrow_thickness
Created training dir: C:Users\paudy\tmp_images\deep_learning_tmp\cartoonset\eyebrow_thickness
Found 10000 files belonging to 4 classes.
Using 8000 files for training.
Found 10000 files belonging to 4 classes.
Using 2000 files for validation.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[1.9399964809417725, 0.5164999961853027]
{'eyebrow_thickness': 0.5163690476190477}


In [None]:
model.fit(train_dataset,  epochs=n_epochs, validation_data=validation_dataset, callbacks=[tensorboard_callback])
print(model.evaluate(validation_dataset))  
this_modeldir = os.path.join(base_modelsdir, feature_name)
if not os.path.exists(this_modeldir):
    os.mkdir(this_modeldir)
modelpath = os.path.join(this_modeldir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+f'epochs-{str(n_epochs)}.h5')
model.save(modelpath, overwrite=True, include_optimizer=True)
loaded_model = keras.models.load_model(modelpath)
acc_score_sum = 0
iterations = 0
for x, y in validation_dataset:
    acc_score_sum += accuracy_score(y, np.argmax(loaded_model.predict(x), axis=1))
    iterations+= 1
validation_metrics[feature_name] = acc_score_sum/iterations
print(validation_metrics)
feature_counter += 1

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20