In [1]:
# Import Necessary Libraries
import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import os

In [2]:
# Suppress TensorFlow logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.get_logger().setLevel('ERROR')

In [3]:
# Load training and test data
train_df = pd.read_csv('/kaggle/input/bttai-ajl-2025/train.csv')
test_df = pd.read_csv('/kaggle/input/bttai-ajl-2025/test.csv')

# Add .jpg extension to md5hash column
train_df['md5hash'] = train_df['md5hash'].astype(str) + '.jpg'
test_df['md5hash'] = test_df['md5hash'].astype(str) + '.jpg'

# Combine label and md5hash to form the correct path
train_df['file_path'] = train_df['label'] + '/' + train_df['md5hash']

In [4]:
# Check the first few rows to understand the structure
print(train_df.head())

                                md5hash  fitzpatrick_scale  \
0  fd06d13de341cc75ad679916c5d7e6a6.jpg                  4   
1  a4bb4e5206c4e89a303f470576fc5253.jpg                  1   
2  c94ce27e389f96bda998e7c3fa5c4a2e.jpg                  5   
3  ebcf2b50dd943c700d4e2b586fcd4425.jpg                  3   
4  c77d6c895f05fea73a8f3704307036c0.jpg                  1   

   fitzpatrick_centaur                             label nine_partition_label  \
0                    4                 prurigo-nodularis     benign-epidermal   
1                    1  basal-cell-carcinoma-morpheiform  malignant-epidermal   
2                    5                            keloid         inflammatory   
3                    3              basal-cell-carcinoma  malignant-epidermal   
4                    1                 prurigo-nodularis     benign-epidermal   

  three_partition_label            qc  ddi_scale  \
0                benign           NaN         34   
1             malignant           Na

In [5]:
# Encode the labels
label_encoder = LabelEncoder()
train_df['encoded_label'] = label_encoder.fit_transform(train_df['label'])

# Split the data into training and validation sets
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)

# Define directories
train_dir = '/kaggle/input/bttai-ajl-2025/train/train/'
test_dir = '/kaggle/input/bttai-ajl-2025/test/test/'

# Function to load and preprocess images
def load_and_preprocess_image(file_path, label):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))
    image = image / 255.0  # Normalize to [0, 1]
    return image, label

In [6]:
# Create training and validation datasets
def create_dataset(df, directory, batch_size=32, shuffle=True):
    file_paths = df['file_path'].apply(lambda x: os.path.join(directory, x)).values
    labels = df['encoded_label'].values

    dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(df))

    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat()  # Repeat indefinitely
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

# Create training and validation datasets
train_dataset = create_dataset(train_data, train_dir, shuffle=True)
val_dataset = create_dataset(val_data, train_dir, shuffle=False)

# Calculate steps_per_epoch and validation_steps
batch_size = 32
steps_per_epoch = len(train_data) // batch_size
validation_steps = len(val_data) // batch_size

In [7]:
# Build the Xception model with fine-tuning
def build_xception_model(fine_tune=True):
    # Load pre-trained Xception model (without top layers)
    base_model = Xception(
        weights='imagenet',
        include_top=False,
        input_shape=(224, 224, 3)
    )

    # Freeze all layers in the base model if not fine-tuning
    for layer in base_model.layers:
        layer.trainable = fine_tune

    # Add custom classification layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = Dropout(0.5)(x)
    num_classes = len(label_encoder.classes_)
    predictions = Dense(num_classes, activation='softmax')(x)

    # Create the final model
    model = Model(inputs=base_model.input, outputs=predictions)

    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Build the fine-tuned model
fine_tuned_model = build_xception_model(fine_tune=True)

# Add early stopping and learning rate scheduler
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-6
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
# Train the fine-tuned Xception model
print("Training Fine-Tuned Xception Model")
fine_tuned_model.fit(
    train_dataset,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_dataset,
    validation_steps=validation_steps,
    epochs=20,
    callbacks=[early_stopping, reduce_lr]
)

Training Fine-Tuned Xception Model
Epoch 1/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 487ms/step - accuracy: 0.1332 - loss: 15.8607 - val_accuracy: 0.2776 - val_loss: 13.6058 - learning_rate: 1.0000e-04
Epoch 2/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 612ms/step - accuracy: 0.4313 - loss: 12.5626 - val_accuracy: 0.4407 - val_loss: 11.1651 - learning_rate: 1.0000e-04
Epoch 3/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 516ms/step - accuracy: 0.6486 - loss: 10.0528 - val_accuracy: 0.5130 - val_loss: 9.4066 - learning_rate: 1.0000e-04
Epoch 4/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 519ms/step - accuracy: 0.8097 - loss: 8.0991 - val_accuracy: 0.5204 - val_loss: 8.1135 - learning_rate: 1.0000e-04
Epoch 5/20
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 504ms/step - accuracy: 0.9376 - loss: 6.5031 - val_accuracy: 0.5463 - val_loss: 6.9631 - learning_rate: 1.0000e-04
Epoch

<keras.src.callbacks.history.History at 0x7add4d231ed0>

In [9]:
# Create a dataset for the test data
test_file_paths = test_df['md5hash'].apply(lambda x: os.path.join(test_dir, x)).values
test_labels = np.zeros(len(test_file_paths))  # Dummy labels
test_dataset = tf.data.Dataset.from_tensor_slices((test_file_paths, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [10]:
# Generate predictions using the fine-tuned model
predictions = fine_tuned_model.predict(test_dataset)
predicted_classes = np.argmax(predictions, axis=1)
predicted_labels = label_encoder.inverse_transform(predicted_classes)

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 271ms/step


In [11]:
# Create the submission DataFrame
submission_df = pd.DataFrame({
    'md5hash': test_df['md5hash'],
    'label': predicted_labels
})

In [12]:
# Remove .jpg from md5hash
submission_df['md5hash'] = submission_df['md5hash'].str.replace('.jpg', '')

# Save the submission file
submission_df.to_csv('submission.csv', index=False)

# Verify the first few rows
print(submission_df.head())

                            md5hash                 label
0  0844ae634f0e6e7ef1f73c2aeecbae0e     prurigo-nodularis
1  3b290d262098f761d719aa07cf36c040  basal-cell-carcinoma
2  cf561d08ac46d0fda678bff6621005ee  basal-cell-carcinoma
3  e6371069be05c6b0a95b4b3f1bacc9a5         acne-vulgaris
4  f76cddb37265f97508f159078dcc7e7c          folliculitis
