## 1. Import Necessary Libraries

In [None]:
# Import Necessary Libraries
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras import metrics
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D

## 2. Load Data

In [None]:
# Load Data
train_df = pd.read_csv('/kaggle/input/bttai-ajl-2025/train.csv')
test_df = pd.read_csv('/kaggle/input/bttai-ajl-2025/test.csv')

# Generate file paths correctly
train_df['file_path'] = train_df.apply(
    lambda row: f"/kaggle/input/bttai-ajl-2025/train/train/{row['label']}/{row['md5hash']}.jpg", axis=1
)
test_df['file_path'] = test_df['md5hash'].apply(
    lambda x: f"/kaggle/input/bttai-ajl-2025/test/test/{x}.jpg"
)

# Remove invalid rows
train_df = train_df[(train_df['fitzpatrick_scale'] > 0) & (train_df['label'].notna())]
train_df = train_df[train_df['file_path'].apply(os.path.exists)]
test_df = test_df[test_df['file_path'].apply(os.path.exists)]

Valid training images: 2752
Valid testing images: 1227


## 3. Data Preprocessing

In [None]:
# Data Preprocessing

# Encode the labels
label_encoder = LabelEncoder()
train_df['encoded_label'] = label_encoder.fit_transform(train_df['label'])


# Splitting dataset into training and validation datasets
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df['encoded_label'])



# Define image data generators for training and testing
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=[0.9, 1.1],
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='file_path',
    y_col='encoded_label',
    target_size=(224, 224),
    batch_size=128,
    class_mode='raw',
    shuffle = True
)


val_datagen = ImageDataGenerator()
val_generator = val_datagen.flow_from_dataframe(
    val_df,
    x_col='file_path',
    y_col='encoded_label',  # Provide labels
    target_size=(224, 224),
    batch_size=128,
    class_mode='raw',
    shuffle=False
    
)


test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='file_path',
    target_size=(224, 224),
    batch_size= 128,
    class_mode=None,
    shuffle=False
    
)

# Compute Class Weights
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_df['encoded_label']),
    y=train_df['encoded_label']
)
class_weights_dict = dict(enumerate(class_weights))

Found 2201 validated image filenames.
Found 551 validated image filenames.
Found 1227 validated image filenames.


## 4. Model Training

In [None]:

base_model = tf.keras.applications.DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = True
for layer in base_model.layers[:100]:  # Freeze the first 100 layers (adjust if needed)
    layer.trainable = False

densenet_model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.25),
    layers.Dense(21, activation='softmax')  # Adjust number of classes if needed
])

densenet_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Define callbacks for early stopping, learning rate reduction, and model checkpointing
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
model_ckpt = callbacks.ModelCheckpoint('best_densenet.h5.keras', monitor='val_loss', save_best_only=True)

# Train the model
densenet_model.fit(
    train_generator,
    epochs=15,
    validation_data=val_generator,
    class_weight=class_weights_dict,
    callbacks=[early_stop, reduce_lr, model_ckpt]
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 8s/step - accuracy: 0.1453 - loss: 3.0503
Epoch 2/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 8s/step - accuracy: 0.3280 - loss: 2.2694
Epoch 3/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 8s/step - accuracy: 0.4213 - loss: 1.8969
Epoch 4/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 8s/step - accuracy: 0.4757 - loss: 1.7146
Epoch 5/15
[1m 3/18[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1:59[0m 8s/step - accuracy: 0.5109 - loss: 1.5672

In [None]:
# Assuming the model is already trained or you have loaded weights
# Generate predictions on the validation set
y_pred_dense = np.argmax(densenet_model.predict(val_generator), axis=1)
y_true = val_df['encoded_label'].values

# Calculate F1 Score
f1 = f1_score(y_true, y_pred_dense, average='weighted')
print("DenseNet121 F1 Score:", f1)

# Generate predictions on the test set
y_pred_test = np.argmax(densenet_model.predict(test_generator), axis=1)
test_df['label'] = label_encoder.inverse_transform(y_pred_test)

# Save submission
test_df[['md5hash', 'label']].to_csv('/kaggle/working/submission.csv', index=False)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 518ms/step
Efficient Net b0 F1 Score: 0.5449766222577818


  self._warn_if_super_not_called()


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2s/step
