In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import class_weight
import os

import tensorflow as tf
import tensorflow.keras.layers as klay
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import metrics
from keras import backend as K
from keras.models import Model


#https://machinelearningmastery.com/how-to-use-transfer-learning-when-developing-convolutional-neural-network-models/
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.applications.densenet import DenseNet169

#from wcs.google import google_drive_share
import urllib.request
from urllib.parse import urlparse

import src.helper.helper as hlp
import src.helper.const as const

import warnings
warnings.simplefilter(action='ignore')
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

Using TensorFlow backend.


In [2]:
# Config
DIR = './'
DATA_DIR_POSTER = DIR + '../data/raw/posters/'
DATA_DIR_INTERIM = DIR + "../data/interim/"
DATA_DIR_RAW = DIR + "../data/raw/"
MODEL_DIR = DIR + "../models/"
BASE_DIR = DIR
IMAGES_DIR = DATA_DIR_POSTER
SEED = const.SEED

In [3]:
# Check GPUs
num_gpu = len(tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", num_gpu)

gpus = tf.config.list_physical_devices('GPU')
print(f"{f'Physical GPU Device: {gpus}' if gpus else 'No GPU available'}")

if gpus:
    # Restrict TensorFlow to only allocate 6GB of memory on the first GPU
    try:
        """
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Set memory growth for {len(gpus)} physical GPU(s)")
        """
        mem_lim = 10*1024  # 6GB
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=mem_lim)])        
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(f"Set memory usage to {mem_lim/1000} GB for {len(gpus)} physical GPU(s) -> {len(logical_gpus)} logical GPU(s)")
        
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)
    print("GPU(s) will be automatically choosen for model calculations below.")
else:
    print("CPUs will be automatically choosen for model calculations below.")


Num GPUs Available:  0
No GPU available
CPUs will be automatically choosen for model calculations below.


In [5]:
# Preprocessing
df = pd.read_parquet(DATA_DIR_INTERIM + "df_train_balanced_v3.gzip")

# Setuup data generators
datagen = ImageDataGenerator(rescale=1 / 255., validation_split=0.1)
BATCH_SIZE = 64*8
train_generator = datagen.flow_from_dataframe(
    dataframe=df,
    directory=IMAGES_DIR,
    x_col="filename",
    y_col="genre_id",
    batch_size=BATCH_SIZE,
    seed=SEED,
    shuffle=True,
    class_mode="categorical",
    target_size=(299, 299),
    subset='training',
    validate_filenames=True
)

valid_generator = datagen.flow_from_dataframe(
    dataframe=df,
    directory=IMAGES_DIR,
    x_col="filename",
    y_col="genre_id",
    batch_size=BATCH_SIZE,
    seed=SEED,
    shuffle=True,
    class_mode="categorical",
    target_size=(299, 299),
    subset='validation',
    validate_filenames=True
)

# Setup class weights
class_weights = class_weight.compute_class_weight('balanced',
                                                  np.array(list(train_generator.class_indices.keys()),dtype="int"), 
                                                  np.array(df.genre_id.explode(),dtype="int"))
                                                 #np.unique(y_train),
                                                 #y_train)
class_weights_genre_id = dict(zip(list(train_generator.class_indices), class_weights))
display(class_weights_genre_id)
class_weights = dict(zip(list(range(len(class_weights))), class_weights))

Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.


ValueError: classes should include all valid labels that can be in y

In [None]:
%%time
# Build model
tf.debugging.set_log_device_placement(True)

# Use different multi GPU strategies
#strategy = tf.distribute.MirroredStrategy()
#strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.NcclAllReduce())
strategy = tf.distribute.experimental.CentralStorageStrategy()
#strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
runtime_context = strategy.scope()

# Use one GPU
#runtime_context = tf.device("/GPU:0")

# Usa all CPUs
#runtime_context = tf.device("/CPU:0")

with runtime_context:    
    # Build model
    inputs = klay.Input(shape=(299, 299, 3), name="Input")
    x = klay.Conv2D(
        filters=32,  # channels
        kernel_size=(3, 3),  # convolutional matrix
        name='Conv_1')(inputs)

    x = klay.MaxPool2D(
        pool_size=(2,2),
        name='MaxPool_1')(x)

    x = klay.Conv2D(
        filters=64,
        kernel_size=(3, 3),
        name='Conv_2')(x)

    x = klay.MaxPool2D(
        pool_size=(2,2),
        name='MaxPool_2')(x)

    x = klay.Conv2D(
        filters=128,
        kernel_size=(3, 3),
        name='Conv_3')(x)

    x = klay.Flatten(
        name='Flatten')(x)

    x = klay.Dense(
        64,
        activation='relu',
        name='Dense')(x)

    outputs = klay.Dense(
        len(train_generator.class_indices),
        activation='sigmoid',
        name='Output')(x)

    model = keras.Model(inputs=inputs, outputs=outputs, name="CNN")
   
    print(model.summary())
    
    # Compile model
    model.compile(optimizer='adam', loss="binary_crossentropy", 
          metrics=[metrics.categorical_accuracy,
                   tf.keras.metrics.AUC(),
                   tf.keras.metrics.Precision(), 
                   tf.keras.metrics.Recall(),
                   hlp.f1_m,
                  ])

    # Train model
    history = model.fit(
        train_generator,
        validation_data=valid_generator,
        epochs=1,
        batch_size=BATCH_SIZE,
        #steps_per_epoch=1430,
        class_weight = class_weights
    )       