In [1]:
import tensorflow as tf
import keras
import numpy as np

In [2]:
def load_data_artifact(dir_path):
    train, valid = tf.keras.preprocessing.image_dataset_from_directory(
        dir_path,
        label_mode='binary',
        image_size=[200, 200],
        validation_split=0.2,
        subset='both',
        shuffle=True,
        seed=42,
        interpolation='area',
        batch_size=256
    )

    return train, valid

In [3]:
train, valid = load_data_artifact("../../datasets/artifact")

Found 1934329 files belonging to 2 classes.
Using 1547464 files for training.
Using 386865 files for validation.


In [4]:
train_batch = train.take(32)
valid_batch = valid.take(16)

In [5]:
train_batch = train_batch.map(lambda x, y: (tf.keras.applications.vgg19.preprocess_input(x), y))
valid_batch = valid_batch.map(lambda x, y: (tf.keras.applications.vgg19.preprocess_input(x), y))

In [6]:
model = keras.applications.VGG19(False, 'imagenet', input_shape=(200, 200, 3))
model.trainable = False

In [7]:
model.summary()

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 200, 200, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 200, 200, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 200, 200, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 100, 100, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 100, 100, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 100, 100, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 50, 50, 128)       0     

In [8]:
def extract_features(dataset, model):
    all_features = []
    all_labels = []
    for images, labels in dataset:
        batch_features = model.predict(images)
        all_features.append(batch_features)
        all_labels.append(labels.numpy())
    features = np.concatenate(all_features, axis=0)
    labels = np.concatenate(all_labels, axis=0)
    return features, labels

In [9]:
train_x, train_y = extract_features(train_batch, model)



In [10]:
valid_x, valid_y = extract_features(valid_batch, model)



In [11]:
np.save('train_x.npy', train_x)
np.save('train_y.npy', train_y)
np.save('valid_x.npy', valid_x)
np.save('valid_y.npy', valid_y)