In [1]:
import os
import numpy as np
from tqdm import tqdm
import pandas as pd
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from tensorflow import keras

2024-05-27 07:08:19.177775: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-27 07:08:19.177886: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-27 07:08:19.351450: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Load the CSV file
csv_file_path = '/kaggle/input/rsna-bone-age/boneage-training-dataset.csv'
dataset_path = '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset'
df = pd.read_csv(csv_file_path)

# Filter the dataset based on conditions
ra = []
for i in range(187):
    if i % 12 == 5 or i % 12 == 6 or i % 12 == 7:
        ra.append(i)

id_to_bone_age = {row['id']: row['boneage'] for _, row in df.iterrows()}

selected_ids = [int(file_name.split('.')[0]) for file_name in os.listdir(dataset_path) if file_name.split('.')[0].isdigit()]
filtered_ids = [id for id in selected_ids if id_to_bone_age[id] in ra]

image_paths = [os.path.join(dataset_path, f"{id}.png") for id in filtered_ids]
ages = [id_to_bone_age[id] // 12 for id in filtered_ids]

# Split the dataset into training and testing sets
X_train_paths, X_test_paths, y_train, y_test = train_test_split(
    image_paths, ages, test_size=0.2, random_state=42
)

In [3]:
id_to_bone_age = {row['id']: row['boneage'] for _, row in df.iterrows()}
image_paths = []
ages = []

for file_name in os.listdir(dataset_path):
    file_path = os.path.join(dataset_path, file_name)
    idf=file_name.split('.')[0]
    if(idf.isdigit()):
      if(id_to_bone_age[int(idf)] in ra ):

        image_paths.append(file_path)
        ages.append(id_to_bone_age[int(file_name.split('.')[0])]//12)

In [4]:
image_paths = np.array(image_paths)
ages = np.array(ages)
print(image_paths)
print(ages)
print(len(image_paths),len(ages))

['/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/9273.png'
 '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/14127.png'
 '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/11396.png'
 ...
 '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/6995.png'
 '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/14760.png'
 '/kaggle/input/rsna-bone-age/boneage-training-dataset/boneage-training-dataset/4225.png']
[12 14  4 ... 11 13 12]
2921 2921


In [5]:
unique_values, counts = np.unique(ages, return_counts=True)

# Display the value counts
for value, count in zip(unique_values, counts):
    print(f"{value}: {count} occurrences")

0: 2 occurrences
1: 29 occurrences
2: 39 occurrences
3: 90 occurrences
4: 101 occurrences
5: 44 occurrences
6: 56 occurrences
7: 51 occurrences
8: 51 occurrences
9: 116 occurrences
10: 204 occurrences
11: 536 occurrences
12: 681 occurrences
13: 685 occurrences
14: 98 occurrences
15: 138 occurrences


In [6]:
for value, count in zip(unique_values, counts):
    if count < 75:
        # Augment the data to reach 75 occurrences
        augmentation_factor = int(np.ceil(75 / count))
        indices = np.where(ages == value)[0]

        # Augment the data by replicating images
        augmented_paths = np.repeat(np.array(image_paths)[indices], augmentation_factor)
        augmented_ages = np.repeat(value, len(augmented_paths))

        # Update the dataset
        image_paths = np.concatenate([image_paths, augmented_paths])
        ages = np.concatenate([ages, augmented_ages])

# Split the augmented dataset into training and testing sets
X_train_paths, X_test_paths, y_train, y_test = train_test_split(
    image_paths, ages, test_size=0.2, random_state=42
)

In [7]:
unique_values, counts = np.unique(ages, return_counts=True)

# Display the value counts
for value, count in zip(unique_values, counts):
    print(f"{value}: {count} occurrences")

0: 78 occurrences
1: 116 occurrences
2: 117 occurrences
3: 90 occurrences
4: 101 occurrences
5: 132 occurrences
6: 168 occurrences
7: 153 occurrences
8: 153 occurrences
9: 116 occurrences
10: 204 occurrences
11: 536 occurrences
12: 681 occurrences
13: 685 occurrences
14: 98 occurrences
15: 138 occurrences


In [8]:
# Load and preprocess images
import cv2
from tensorflow.keras.preprocessing import image
def preprocess_images(image_paths, target_size=(224, 224)):
    images = []
    for path in tqdm(image_paths, desc='Loading and Preprocessing Images'):
        img = image.load_img(path, target_size=target_size)
        img = image.img_to_array(img)  # Convert PIL Image to NumPy array
        img = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        img = cv2.equalizeHist(img.astype('uint8'))
        img_array = np.expand_dims(img, axis=0)
        img_array = preprocess_input(img_array)
        images.append(img_array)
    return np.vstack(images)

X_train = preprocess_images(X_train_paths)
X_test = preprocess_images(X_test_paths)


Loading and Preprocessing Images: 100%|██████████| 2852/2852 [02:01<00:00, 23.45it/s]
Loading and Preprocessing Images: 100%|██████████| 714/714 [00:28<00:00, 25.18it/s]


In [9]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import cv2

# Load the pre-trained Xception model (excluding the top layers)
base_model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(1, activation='linear')(x)  # Regression output

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Load and preprocess images (convert to RGB)
def preprocess_images(image_paths, target_size=(224, 224)):
    images = []
    for path in image_paths:
        img = load_img(path, target_size=target_size)
        img = img_to_array(img)
        img = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2RGB)  # Convert to RGB
        img_array = np.expand_dims(img, axis=0)
        img_array = preprocess_input(img_array)
        images.append(img_array)
    return np.vstack(images)

X_train_rgb = preprocess_images(X_train_paths)
X_test_rgb = preprocess_images(X_test_paths)

# Train the model
history = model.fit(X_train_rgb, y_train, validation_data=(X_test_rgb, y_test), epochs=10, batch_size=32)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_rgb, y_test)
print(f"Test MAE: {test_mae}")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10


I0000 00:00:1716794058.341719      74 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1716794058.414362      74 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 663ms/step - loss: 17.7350 - mae: 2.8969

W0000 00:00:1716794117.746858      74 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1716794121.810619      71 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 805ms/step - loss: 17.6179 - mae: 2.8851 - val_loss: 75.8173 - val_mae: 8.1081
Epoch 2/10


W0000 00:00:1716794130.437867      74 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 433ms/step - loss: 1.6329 - mae: 1.0071 - val_loss: 12.4382 - val_mae: 3.0892
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 429ms/step - loss: 1.2830 - mae: 0.9018 - val_loss: 2.3920 - val_mae: 1.2380
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 430ms/step - loss: 0.7577 - mae: 0.6834 - val_loss: 2.7782 - val_mae: 1.3386
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 430ms/step - loss: 0.6386 - mae: 0.6153 - val_loss: 1.1450 - val_mae: 0.7838
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 430ms/step - loss: 0.6244 - mae: 0.6291 - val_loss: 1.3987 - val_mae: 0.9107
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 430ms/step - loss: 0.6789 - mae: 0.6581 - val_loss: 4.1287 - val_mae: 1.7488
Epoch 8/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 430ms/step - los

In [10]:
# Plot training & validation loss values
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')

# Plot training & validation accuracy values (if available)
if 'accuracy' in history.history:
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='upper left')

plt.tight_layout()
plt.show()

NameError: name 'plt' is not defined