In [None]:
import tensorflow as tf
import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import keras_cv
import h5py
from sklearn.model_selection import train_test_split 


train_file = "isic-2024-challenge/train-metadata.csv"
df = pd.read_csv(train_file)

In [None]:
df['isic_id'] = 'isic-2024-challenge/train-image/image/' + df['isic_id'] + '.jpg'
df_m = df[df['target'] == 1]
df_b = df[df['target'] == 0]

print(len(df_m.index))
df_b = df_b.sample(len(df_m.index)*20)
df_balanced = pd.concat([df_m, df_m, df_m, df_m, df_m, df_m, df_m, df_m, df_m, df_m, df_m, df_m, df_m, df_b], axis=0)

df_balanced['target'].value_counts().plot.pie()
print(len(df_balanced.index))

In [None]:
train_df, test_df = train_test_split(df_balanced, random_state=2, test_size=0.2) 
train_df['target'].value_counts().plot.pie()
print(len(train_df))

In [None]:
test_df['target'].value_counts().plot.pie()
print(len(test_df))

In [None]:

def process_data(dft):
    img_filenames = dft['isic_id']
    labels = dft['target']

    ds = tf.data.Dataset.from_tensor_slices((img_filenames, labels))

    def parse_image(filename, label):
        print(filename)   
        image = tf.io.read_file(filename)
        image = tf.io.decode_jpeg(image)
        img_shape = image.shape
        print("Shape is: ", img_shape)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(image,
                                [256,256],
                                preserve_aspect_ratio=False,
                                antialias=False,
                                name=None
                                )
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        image = tf.image.random_jpeg_quality(image, 75, 95)
        image = tf.image.random_brightness(image, 0.22)
        image = tf.image.random_contrast(image, 0.5, 2.0)
        image = tf.image.random_saturation(image, 0.75, 1.25)
        image = tf.image.random_hue(image, 0.15)
        return image, label
    
    ds = ds.map(parse_image)
    
    return ds


In [None]:
ds_train = process_data(train_df)
ds_val = process_data(test_df)

del(train_df)
del(test_df)

image, label  = next(iter(ds_train))

ds_train = ds_train.batch(8)
ds_val = ds_val.batch(8)

In [None]:
backbone = keras_cv.models.EfficientNetV2Backbone.from_preset(
        "efficientnetv2_b2_imagenet"
)
model = keras_cv.models.ImageClassifier(
    backbone=backbone,
    num_classes=1,
    activation="swish",
)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    metrics=['accuracy']
)
model.summary()

cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=5,
    start_from_epoch=20
)


model.fit(ds_train, validation_data=ds_val, epochs=50, callbacks=[cb])

In [None]:
del (ds_train)
del (ds_val)

hdf5_path = "isic-2024-challenge/test-image.hdf5"

class data_generator:
    def __init__(self, file):
        self.file = file

    def __call__(self):
        with h5py.File(self.file, 'r') as hf:
            for im in hf.keys():
                image = tf.io.decode_jpeg(hf[im][()])
                image = tf.image.convert_image_dtype(image, tf.float32)
                image = tf.image.resize(image,
                                [256,256],
                                preserve_aspect_ratio=False,
                                antialias=False,
                                name=None
                                )
                yield image

with h5py.File(hdf5_path, 'r') as hf:
    isic_ids = list(hf.keys())

ds = tf.data.Dataset.from_generator(
    data_generator(hdf5_path), 
    tf.float32, 
    tf.TensorShape([256,256,3]))
    
ds = ds.batch(32)

targets = model.predict(ds)
    
submission_df = pd.DataFrame({"isic_id": isic_ids, "target": targets.flatten()})
#submission_df.head()
submission_df.to_csv("submission.csv", index=False)