In [None]:
!pip install kaggle



In [None]:
!pip install tensorflow




In [None]:
import os
import zipfile

# Make the .kaggle directory and move kaggle.json there
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json  # Set permissions


In [None]:
!kaggle datasets download -d nischaydnk/isic-2019-jpg-224x224-resized


Dataset URL: https://www.kaggle.com/datasets/nischaydnk/isic-2019-jpg-224x224-resized
License(s): CC0-1.0
Downloading isic-2019-jpg-224x224-resized.zip to /content
100% 353M/355M [00:00<00:00, 452MB/s]
100% 355M/355M [00:00<00:00, 501MB/s]


In [None]:
import zipfile

with zipfile.ZipFile("isic-2019-jpg-224x224-resized.zip", 'r') as zip_ref:
    zip_ref.extractall("isic_dataset")


In [None]:
import os

# Check files and folders
print(os.listdir("isic_dataset"))


['train-image', 'train-metadata.csv', '.ipynb_checkpoints']


In [None]:
import pandas as pd
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Load metadata
df = pd.read_csv("isic_dataset/train-metadata.csv")

# Create 'image' column by appending ".jpg" to isic_id
df['image'] = df['isic_id'] + '.jpg'

# Rename label column for clarity
df['label'] = df['target']  # Already binary

# Split into train and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

# Sanity check
print(f"Train size: {len(train_df)}, Validation size: {len(val_df)}")


Train size: 20264, Validation size: 5067


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models

IMG_SIZE = 224
BATCH_SIZE = 48
EPOCHS = 7
image_dir ='isic_dataset/train-image/image'



In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping

# Create the data generators (your current setup, adjust paths/names as needed)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.2,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator(rescale=1./255)

# Make sure train_df and val_df have 'image' and 'label' columns and labels are string types
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_dir,
    x_col='image',
    y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'  # multi-class classification
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=image_dir,
    x_col='image',
    y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Load EfficientNetB0 without the classification head
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False  # Freeze the base for transfer learning

# Add custom layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.4)(x)
x = Dense(64, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)  # Assuming 5 classes

model = Model(inputs=base_model.input, outputs=predictions)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',  # For multi-class classification
    metrics=['accuracy']
)

# Train model
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=[early_stop,reduce_lr]
)


Found 20264 validated image filenames belonging to 2 classes.
Found 5067 validated image filenames belonging to 2 classes.


  self._warn_if_super_not_called()


Epoch 1/7
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 657ms/step - accuracy: 0.8187 - loss: 0.5020 - val_accuracy: 0.8214 - val_loss: 0.4695 - learning_rate: 1.0000e-05
Epoch 2/7
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 597ms/step - accuracy: 0.8160 - loss: 0.4804 - val_accuracy: 0.8214 - val_loss: 0.4693 - learning_rate: 1.0000e-05
Epoch 3/7
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 600ms/step - accuracy: 0.8190 - loss: 0.4761 - val_accuracy: 0.8214 - val_loss: 0.4693 - learning_rate: 1.0000e-05
Epoch 4/7
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 607ms/step - accuracy: 0.8257 - loss: 0.4674 - val_accuracy: 0.8214 - val_loss: 0.4693 - learning_rate: 1.0000e-05
Epoch 5/7
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583ms/step - accuracy: 0.8231 - loss: 0.4685
Epoch 5: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.
[1m423/423[0m [32m━━━━━━━━━━

In [None]:
print("Sample image filename:", train_df['image'].iloc[0])



Sample image filename: ISIC_0069696.jpg


In [None]:
print("Image directory:", image_dir)


Image directory: isic_dataset/train-image/image


In [None]:
image_path = os.path.join(image_dir, train_df['image'].iloc[0])
print("Full path:", image_path)
print("Exists?", os.path.exists(image_path))


Full path: isic_dataset/train-image/image/ISIC_0069696.jpg
Exists? True
