In [3]:
import pandas as pd 
import numpy as np
import os
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers, Model, regularizers
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator


2024-03-31 01:21:08.099265: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
pip install pandas numpy Pillow tensorflow tensorflow-addons

Collecting pandas
  Using cached pandas-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting Pillow
  Using cached pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting tensorflow-addons
  Using cached tensorflow_addons-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Using cached typeguard-2.13.3-py3-none-any.whl.metadata (3.6 kB)
Using cached pandas-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
Using cached pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.5 MB)
Using cached tensorflow_addons-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
Using cached typeguard-2.13.3-py3-none-any.whl (17 kB)
Using cached tzdata-2024.1-py2.py3-none-any.whl (345 kB)
Installin

In [4]:
train_image_dir = '/home/jovyan/shared/sampled_data/train_data'
val_image_dir = '/home/jovyan/shared/sampled_data/val_data'
test_image_dir = '/home/jovyan/shared/sampled_data/test_data'


train_metadata = pd.read_csv('/home/jovyan/shared/sampled_data/train_data.csv', usecols=['filename', 'age'])
val_metadata = pd.read_csv('/home/jovyan/shared/sampled_data/val_data.csv', usecols=['filename', 'age'])
test_metadata = pd.read_csv('/home/jovyan/shared/sampled_data/test_data.csv', usecols=['filename', 'age'])

In [5]:
# Load pre-trained VGG16 model without top (fully connected layers)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

2024-03-31 01:21:14.599725: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-31 01:21:14.704506: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-31 01:21:14.705937: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [6]:
x = base_model.output
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.Dropout(0.5)(x)
age_output = layers.Dense(1, activation='linear', name='age_output')(x)

model = Model(inputs=base_model.input, outputs=age_output)

model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

In [7]:
# Define data generators with augmentation
def preprocess_image(image):
    image = np.array(image) / 255.0
    return image

def generate_data_with_augmentation(metadata, image_dir, batch_size=32):
    datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest',
        preprocessing_function=preprocess_image
    )

    num_samples = len(metadata)
    while True:
        batches = datagen.flow_from_dataframe(
            dataframe=metadata,
            directory=image_dir,
            x_col='filename',
            y_col='age',
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='raw'
        )
        for batch_images, batch_ages in batches:
            batch_images = np.array([preprocess_image(img) for img in batch_images])
            yield batch_images, batch_ages

In [8]:
batch_size = 32
train_data_generator = generate_data_with_augmentation(train_metadata, train_image_dir, batch_size=batch_size)
validation_data_generator = generate_data_with_augmentation(val_metadata, val_image_dir, batch_size=batch_size)

In [10]:
# Train the model with early stopping
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)]
model.fit(train_data_generator,
          epochs=50,
          steps_per_epoch=len(train_metadata) // batch_size,
          validation_data=validation_data_generator,
          validation_steps=len(val_metadata) // batch_size,
          callbacks=callbacks)

# Evaluate the model on test data
test_data_generator = generate_data_with_augmentation(test_metadata, test_image_dir, batch_size=batch_size)
evaluation_results = model.evaluate(test_data_generator, steps=len(test_metadata) // batch_size)

print("Evaluation Results:")
print("Loss:", evaluation_results[0])
print("MAE:", evaluation_results[1])


Epoch 1/50
  1/934 [..............................] - ETA: 1:13 - loss: 775.6577 - mae: 24.5794

2024-03-31 03:41:40.666934: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2024-03-31 03:46:56.980241: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Found 3739 validated image filenames.
  1/116 [..............................] - ETA: 10s - loss: 552.9177 - mae: 19.9610

2024-03-31 04:52:27.052687: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Evaluation Results:
Loss: 477.8277282714844
MAE: 18.39571762084961
