In [1]:
import tensorflow as tf
tf.__version__

2023-01-25 19:09:44.017389: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


'2.9.1'

## Training and Saving the Selected Model

In [2]:
from tensorflow import keras

from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.applications.mobilenet import decode_predictions

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
# specify the paths to the data directories
train_dir = "data/train"
val_dir = "data/validation"
test_dir = "data/test"

In [4]:
# Function to create model

def make_model(learning_rate=0.01):
    base_model = MobileNet(weights='imagenet',
                          include_top=False,
                          input_shape=(224,224,3))

    base_model.trainable = False
    
    #########################################
    
    inputs = keras.Input(shape=(224,224, 3))
    base = base_model(inputs, training=False)
    vectors = keras.layers.GlobalAveragePooling2D()(base)
    outputs = keras.layers.Dense(3)(vectors)
    model = keras.Model(inputs, outputs)
    
    #########################################
    
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    loss = keras.losses.CategoricalCrossentropy(from_logits=True)

    # Compile the model
    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=['accuracy'])
    
    return model

In [5]:
# Training with Data Augmentation

train_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range = 30,
    width_shift_range = 10.0,
    height_shift_range=10.0,
    zoom_range=0.1,
    vertical_flip=True
)

train_ds = train_gen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32
)

val_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

val_ds = val_gen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    shuffle=False
)

test_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

test_ds = val_gen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    shuffle=False
)

Found 928 images belonging to 3 classes.
Found 115 images belonging to 3 classes.
Found 115 images belonging to 3 classes.


In [6]:
checkpoint = keras.callbacks.ModelCheckpoint(
    'mobilenet_v1_{epoch:02d}_{val_accuracy:.3f}.h5',
    save_best_only=True,
    monitor='val_accuracy',
    mode='max'
)

In [7]:
learning_rate = 0.01

model = make_model(learning_rate=learning_rate)

history = model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds,
    callbacks=[checkpoint]
)

2023-01-25 19:09:45.835481: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 19:09:45.842201: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 19:09:45.842541: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-25 19:09:45.843047: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the approp

Epoch 1/10


2023-01-25 19:09:59.306193: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 18.38MiB (rounded to 19267584)requested by op model/mobilenet_1.00_224/conv1/Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-01-25 19:09:59.306253: I tensorflow/core/common_runtime/bfc_allocator.cc:1027] BFCAllocator dump for GPU_0_bfc
2023-01-25 19:09:59.306265: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (256): 	Total Chunks: 78, Chunks in use: 76. 19.5KiB allocated for chunks. 19.0KiB in use in bin. 3.5KiB client-requested in use in bin.
2023-01-25 19:09:59.306273: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (512): 	Total Chunks: 17, Chunks in use: 17. 8.8KiB allocated for chunks. 8.8KiB in use in bin. 8.4Ki

ResourceExhaustedError: Graph execution error:

OOM when allocating tensor with shape[32,3,224,224] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node model/mobilenet_1.00_224/conv1/Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_4664]