In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import datetime, os
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalMaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import Sequential
from keras.metrics import mean_absolute_error

#loading dataframes
train_df = pd.read_csv('/kaggle/input/rsna-bone-age/boneage-training-dataset.csv')
test_df = pd.read_csv('/kaggle/input/rsna-bone-age/boneage-test-dataset.csv')

#appending file extension to id column for both training and testing dataframes
train_df['id'] = train_df['id'].apply(lambda x: str(x)+'.png')
test_df['Case ID'] = test_df['Case ID'].apply(lambda x: str(x)+'.png') 

#models perform better when features are normalised to have zero mean and unity standard deviation
#using z score for the training
mean_bone_age = train_df['boneage'].mean()
std_bone_age = train_df['boneage'].std()
train_df['bone_age_z'] = (train_df['boneage'] - mean_bone_age)/(std_bone_age)

# Define the age ranges
ages = train_df['boneage'].values
image_paths = train_df['id'].values
max_age = max(ages)
age_ranges = [(i, i+1) for i in range(0, max_age+1)]

for lower_bound, upper_bound in age_ranges:
    # Calculate the localized mean for the current age range
    localized_mean = np.mean([age for age in ages if lower_bound <= age < upper_bound])

    for value in range(lower_bound, upper_bound):
        count = np.count_nonzero(ages == value)

        if count < 75:
            # Calculate the augmentation factor
            augmentation_factor = int(np.ceil(75 / count)) if count > 0 else 75

            # Get the indices of the images corresponding to the current age
            indices = [i for i, age in enumerate(ages) if age == value]

            # Augment the data by replicating images
            augmented_paths = np.repeat(np.array(image_paths)[indices], augmentation_factor)
            augmented_ages = np.full(len(augmented_paths), localized_mean)

            # Update the dataset
            image_paths = np.concatenate([image_paths, augmented_paths])
            ages = np.concatenate([ages, augmented_ages])

# Split the augmented dataset into training and testing sets
X_train_paths, X_test_paths, y_train, y_test = train_test_split(
    image_paths, ages, test_size=0.2, random_state=42
)

2024-07-12 01:47:27.518002: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-12 01:47:27.518103: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-12 01:47:27.680893: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [2]:
img_size = 256
data_augmentation = dict(rotation_range=0.2, zoom_range=0.1, horizontal_flip=True,
                                width_shift_range=0.05,
                                height_shift_range=0.05,
                                shear_range=0.05, fill_mode='nearest')
# Define the data generators
train_data_generator = ImageDataGenerator(preprocessing_function = tf.keras.applications.xception.preprocess_input,  **data_augmentation)
val_data_generator = ImageDataGenerator(preprocessing_function = tf.keras.applications.xception.preprocess_input)

# Define the generators
train_generator = train_data_generator.flow_from_dataframe(
    dataframe = train_df[train_df['id'].isin(X_train_paths)],
    directory = '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset',
    x_col= 'id',
    y_col= 'bone_age_z',
    batch_size = 32,
    seed = 42,
    shuffle = True,
    class_mode= 'other',
    flip_vertical = True,
    color_mode = 'rgb',
    target_size = (img_size, img_size))

val_generator = val_data_generator.flow_from_dataframe(
    dataframe = train_df[train_df['id'].isin(X_test_paths)],
    directory = '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset',
    x_col = 'id',
    y_col = 'bone_age_z',
    batch_size = 32,
    seed = 42,
    shuffle = True,
    class_mode = 'other',
    flip_vertical = True,
    color_mode = 'rgb',
    target_size = (img_size, img_size))


Found 10331 validated image filenames.
Found 3111 validated image filenames.


In [3]:
# Define the custom metric
def mae_in_months(x_p, y_p):
    return mean_absolute_error((std_bone_age*x_p + mean_bone_age), (std_bone_age*y_p + mean_bone_age))

# Define the data generators
train_data_generator = ImageDataGenerator(preprocessing_function = tf.keras.applications.xception.preprocess_input,  **data_augmentation)
val_data_generator = ImageDataGenerator(preprocessing_function = tf.keras.applications.xception.preprocess_input)

# Define the model
model = tf.keras.applications.DenseNet201(input_shape = (img_size, img_size, 3),
                                           include_top = False,
                                           weights = 'imagenet')
model.trainable = True
model = Sequential([model,
                    GlobalMaxPooling2D(),
                    Flatten(),
                    Dense(64, activation = 'relu'),
                    Dense(32, activation = 'relu'),
                    Dense(1, activation = 'linear')])

# Compile the model
model.compile(loss ='mse', optimizer= tf.keras.optimizers.Adamax(learning_rate=0.001), metrics = [mae_in_months])

# Define the callbacks
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience= 12, verbose=0, mode='auto')
mc = ModelCheckpoint('best_model.keras', monitor='val_loss', mode='min', save_best_only=True, save_weights_only = False)
red_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
callbacks = [early_stopping, mc, red_lr_plat]

# Fit the model
history = model.fit(train_generator,
                            steps_per_epoch = 315,
                            validation_data = val_generator,
                            validation_steps = 1,
                            epochs = 60,
                            callbacks= callbacks)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m74836368/74836368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/60


  self._warn_if_super_not_called()
I0000 00:00:1720749239.247986      70 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1720749239.532581      70 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m 30/315[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m7:32[0m 2s/step - loss: 5.2959 - mae_in_months: 67.4005

W0000 00:00:1720749499.578318      71 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1.4125 - mae_in_months: 31.6425

W0000 00:00:1720749984.231426      73 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1094s[0m 2s/step - loss: 1.4099 - mae_in_months: 31.6116 - val_loss: 0.2730 - val_mae_in_months: 18.0259 - learning_rate: 0.0010
Epoch 2/60
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - loss: 0.2492 - mae_in_months: 16.6489 - val_loss: 0.3432 - val_mae_in_months: 19.1523 - learning_rate: 0.0010
Epoch 3/60


  self.gen.throw(typ, value, traceback)


[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m433s[0m 1s/step - loss: 0.2110 - mae_in_months: 14.8501 - val_loss: 0.1588 - val_mae_in_months: 12.5494 - learning_rate: 0.0010
Epoch 4/60
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - loss: 0.2085 - mae_in_months: 14.8831 - val_loss: 0.2650 - val_mae_in_months: 16.2217 - learning_rate: 0.0010
Epoch 5/60
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m426s[0m 1s/step - loss: 0.1689 - mae_in_months: 13.3827 - val_loss: 0.1693 - val_mae_in_months: 13.0656 - learning_rate: 0.0010
Epoch 6/60
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.1580 - mae_in_months: 13.2547 - val_loss: 0.1708 - val_mae_in_months: 13.4857 - learning_rate: 0.0010
Epoch 7/60
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m429s[0m 1s/step - loss: 0.1536 - mae_in_months: 12.7899 - val_loss: 0.1356 - val_mae_in_months: 13.0091 - learning_rate: 0.0010
Epoch 8/60
[1m3

In [4]:
# Build the model
model.build((None, img_size, img_size, 3))

In [5]:
import cv2
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.models import load_model


image_paths = [os.path.join('/kaggle/input/hand-x-rays/Hand xrays', f"{file_name}") for file_name in os.listdir('/kaggle/input/hand-x-rays/Hand xrays')]

def load_and_preprocess_images(image_paths, target_size=(256, 256)):
    images = []
    for path in image_paths:
        img = cv2.imread(path)
        img = cv2.resize(img, target_size)  # Ensure output shape is (224, 224, 3)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_array = np.expand_dims(img, axis=0)
        img_array = preprocess_input(img_array)  # Use DenseNet201's preprocess_input
        images.append(img_array)
    return np.vstack(images)

new_images = load_and_preprocess_images(image_paths)

predictions = model.predict(new_images)

# Print the predicted ages
for i, prediction in enumerate(predictions):
    print(f"Image {i+1} predicted age: {prediction[0]:.2f} years")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 33s/step
Image 1 predicted age: -2.02 years
Image 2 predicted age: 0.16 years
Image 3 predicted age: 0.70 years
Image 4 predicted age: -0.83 years
Image 5 predicted age: 0.85 years
Image 6 predicted age: 0.72 years
Image 7 predicted age: -1.18 years
Image 8 predicted age: 0.64 years
Image 9 predicted age: 0.40 years
Image 10 predicted age: -1.22 years
Image 11 predicted age: -0.56 years
Image 12 predicted age: -0.98 years
Image 13 predicted age: -0.40 years
Image 14 predicted age: 0.33 years
Image 15 predicted age: -1.75 years


W0000 00:00:1720759559.888469      73 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
