In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras_preprocessing.image import ImageDataGenerator

In [6]:
# Config 1: Intercept-ResNet-V2
# Path config
#INPUT_PATH = "../data/raw/"
#IMAGE_PATH = INPUT_PATH + 'reality_check/'
#OUTPUT_IMAGE_PATH = "../data/interim/aida-image-captioning_inceptresnetv2_reality-check_v2/Images/"
#MODEL1 = tf.keras.applications.inception_resnet_v2
# MODEL2 = tf.keras.applications.InceptionResNetV2(
#TARGET_SIZE = (299, 299)

# Config 2: InterceptV3
INPUT_PATH = "../data/raw/"
IMAGE_PATH = INPUT_PATH + 'reality_check/'
OUTPUT_PATH = "../data/interim/aida-image-captioning_inceptionv3_reality-check_v2/"
OUTPUT_IMAGE_PATH = OUTPUT_PATH + "Images/"
MODEL1 = tf.keras.applications.inception_v3
MODEL2 = tf.keras.applications.InceptionV3
TARGET_SIZE = (299, 299)

# GPU/CPU config
MEMORY_OF_GPU = 6000  # MBytes

In [7]:
# To get access to a GPU instance you can use the `change runtime type` and set the option to `GPU` from the `Runtime` tab  in the notebook
# Checking the GPU availability for the notebook
#tf.test.gpu_device_name()

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Create virtual GPUs
    try:
        tf.config.experimental.set_virtual_device_configuration(
            #OK, but solwer: 
            #gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024),
            #      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024),
            #      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024),
            #      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024)],
            #OK
            gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=MEMORY_OF_GPU//2),
                      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=MEMORY_OF_GPU//2)],
            #Error using NCCL automatically on mirrored strategy: gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10*1024)],
        )

        tf.config.experimental.set_virtual_device_configuration(
            #OK, but solwer: 
            #gpus[1], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024),
            #      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024),
            #      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024),
            #      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2.5*1024)],
            #OK 
            gpus[1], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=MEMORY_OF_GPU//2),
                      tf.config.experimental.VirtualDeviceConfiguration(memory_limit=MEMORY_OF_GPU//2)],
            #Error using NCCL automatically on mirrored strategy: gpus[1], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10*1024)],            
        )
    except:
        # Virtual devices must be set before GPUs have been initialized
        print("Warning: During GPU handling.")
        pass
    finally:
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs\n")

# Set runtime context and batch size
l_rtc_names = [
    "multi-GPU_MirroredStrategy",
    "multi-GPU_CentralStorageStrategy",
    "1-GPU",
    "CPUs",
    "multi-GPU_MirroredStrategy_NCCL-All-Reduced",
]
l_rtc = [
    tf.distribute.MirroredStrategy().scope(),
    tf.distribute.experimental.CentralStorageStrategy().scope(),
    tf.device("/GPU:0"),
    tf.device("/CPU:0"),
    tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.NcclAllReduce()).scope(),
]
if len(gpus) == 0:
    rtc_idx = 3
    batch_size = 64

elif len(gpus) == 1:
    rtc_idx = 2
    batch_size = 4*256

elif len(gpus) > 1:
    rtc_idx = 0
    batch_size = 8*256

runtime_context = l_rtc[rtc_idx]

print(f"\nRuntime Context: {l_rtc_names[rtc_idx]}")
print(f"Recommended Batch Size: {batch_size} datasets")

1 Physical GPU, 2 Logical GPUs

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
INFO:tensorflow:ParameterServerStrategy (CentralStorageStrategy if you are using a single machine) with compute_devices = ['/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1'], variable_device = '/device:CPU:0'
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')

Runtime Context: 1-GPU
Recommended Batch Size: 1024 datasets


In [8]:
# Set display option
pd.set_option('display.max_colwidth',-1) # Set the max column width to see the complete caption

# Set image to predict
df = pd.DataFrame(data=["historical.jpg", "rae.jpg", "ths.jpg", "2662262499_3cdf49cedd.jpg"], columns=["image"])

  


In [9]:
#datagen = ImageDataGenerator(rescale=1./255)  # not necessary anymore
datagen = ImageDataGenerator()  # not necessary anymore
pred_generator = datagen.flow_from_dataframe(dataframe=df,
                                            directory=IMAGE_PATH, 
                                            x_col="image", y_col="image", 
                                            class_mode="raw", 
                                            target_size=TARGET_SIZE,
                                            shuffle=False
                                           )

Found 4 validated image filenames.


In [12]:
image_model = MODEL2(
    include_top=False,
    weights='imagenet'
)

In [13]:
new_input = image_model.input
hidden_layer = image_model.layers[-1].output
image_features_extract_model = tf.keras.Model(new_input, hidden_layer)
#image_features_extract_model.summary()

In [14]:
!mkdir $OUTPUT_PATH
!mkdir $OUTPUT_IMAGE_PATH

mkdir: das Verzeichnis »../data/interim/aida-image-captioning_inceptionv3_reality-check_v2/“ kann nicht angelegt werden: Die Datei existiert bereits
mkdir: das Verzeichnis »../data/interim/aida-image-captioning_inceptionv3_reality-check_v2/Images/“ kann nicht angelegt werden: Die Datei existiert bereits


In [15]:
rtx_idx = 3
runtime_context = l_rtc[rtc_idx]
with runtime_context:
    i=0
    for X, _ in pred_generator:
        img = MODEL1.preprocess_input(X)
        batch_features = image_features_extract_model(img)
        batch_features = tf.reshape(batch_features,
                                    (batch_features.shape[0], -1, batch_features.shape[3]))
        if i == 0:
            print(f"X.shape: {X.shape}")
            print(f"img.shape: {img.shape}")
            print(f"batch_features.shape: {batch_features.shape}")
            print(f"reshaped batch_features.shape: {batch_features.shape}")

        for bf in batch_features:
            filename = OUTPUT_IMAGE_PATH + df.iloc[i].image.replace(".jpg",".npy")
            print(f"filename: {filename}")
            np.save(filename, bf.numpy())
            i=i+1

X.shape: (4, 299, 299, 3)
img.shape: (4, 299, 299, 3)
batch_features.shape: (4, 64, 2048)
reshaped batch_features.shape: (4, 64, 2048)
filename: ../data/interim/aida-image-captioning_inceptionv3_reality-check_v2/Images/historical.npy
filename: ../data/interim/aida-image-captioning_inceptionv3_reality-check_v2/Images/rae.npy
filename: ../data/interim/aida-image-captioning_inceptionv3_reality-check_v2/Images/ths.npy
filename: ../data/interim/aida-image-captioning_inceptionv3_reality-check_v2/Images/2662262499_3cdf49cedd.npy


IndexError: single positional indexer is out-of-bounds