In [6]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load Data
train_df = pd.read_csv('/kaggle/input/bttai-ajl-2025/train.csv')
test_df = pd.read_csv('/kaggle/input/bttai-ajl-2025/test.csv')

# Add file path locations
train_df['file_path'] = train_df.apply(
    lambda row: f"/kaggle/input/bttai-ajl-2025/train/train/{row['label']}/{row['md5hash']}.jpg", axis=1
)
test_df['file_path'] = test_df['md5hash'].apply(
    lambda x: f"/kaggle/input/bttai-ajl-2025/test/test/{x}.jpg"
)

# Drop invalid fitzpatrick_scale values and missing labels
train_df = train_df[(train_df['fitzpatrick_scale'] > 0) & (train_df['label'].notna())]

# Encode labels
label_encoder = LabelEncoder()
train_df['encoded_label'] = label_encoder.fit_transform(train_df['label'])

# Define data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    brightness_range=[0.8, 1.2],
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)


In [8]:
# Load EfficientNetB0
base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
base_model.trainable = False

# Build model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
output = Dense(len(label_encoder.classes_), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [9]:
# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['encoded_label']), y=train_df['encoded_label'])
class_weights_dict = dict(enumerate(class_weights))


In [10]:
# Create train generator
train_generator = datagen.flow_from_dataframe(
    train_df,
    x_col='file_path',
    y_col='encoded_label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)

# Train model
history = model.fit(train_generator, epochs=10, class_weight=class_weights_dict)


Found 2752 validated image filenames.
Epoch 1/10


  self._warn_if_super_not_called()


[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 521ms/step - accuracy: 0.0455 - loss: 3.2706
Epoch 2/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 435ms/step - accuracy: 0.0690 - loss: 3.1685
Epoch 3/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 434ms/step - accuracy: 0.0917 - loss: 2.9919
Epoch 4/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 432ms/step - accuracy: 0.1121 - loss: 2.9813
Epoch 5/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 432ms/step - accuracy: 0.1478 - loss: 2.8496
Epoch 6/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 429ms/step - accuracy: 0.1491 - loss: 2.7821
Epoch 7/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 426ms/step - accuracy: 0.1820 - loss: 2.7587
Epoch 8/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 431ms/step - accuracy: 0.1758 - loss: 2.7736
Epoch 9/10
[1m86/86[0m [32m━━━━━━━━━━━━━

In [11]:
# Prepare test generator
test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='file_path',
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

# Make predictions
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)

# Convert back to labels
test_df['label'] = label_encoder.inverse_transform(predicted_classes)

# Create submission file
submission = test_df[['md5hash', 'label']]
submission.to_csv('/kaggle/working/sample_submission.csv', index=False)


Found 1227 validated image filenames.
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 476ms/step
