In [None]:
%pip install -q -U numpy pandas matplotlib scikit-learn tensorflow[and-cuda] tensorflow-datasets kaggle



In [None]:
import tensorflow as tf

print("Num GPUs Available: ", tf.config.list_physical_devices('GPU'))


In [10]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
# import kaggle
# from kaggle.api.kaggle_api_extended import KaggleApi
# import os
# import zipfile

# api = KaggleApi()
# api.authenticate()

# dataset_path = "ham10000_data"

# if not os.path.exists(dataset_path):
#     os.makedirs(dataset_path)

# api.dataset_download_files('kmader/skin-cancer-mnist-ham10000', path=dataset_path, unzip=True)

# zip_file_path = os.path.join(dataset_path, 'skin-cancer-mnist-ham10000.zip')
# if os.path.exists(zip_file_path):
#     with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
#         zip_ref.extractall(dataset_path)
#     print(f"Dataset extracted to: {dataset_path}")
# else:
#     print("Dataset already extracted or no zip file found.")


In [None]:
%ls ham10000_data/images/


In [None]:
import pandas as pd
import os
import tensorflow as tf

# Load the CSV file
csv_path = 'ham10000_data/HAM10000_metadata.csv'
df = pd.read_csv(csv_path)

image_dir = 'ham10000_data/images/'

# Create a mapping of labels to integers (for one-hot encoding)
label_mapping = {label: idx for idx, label in enumerate(df['dx'].unique())}
df['label'] = df['dx'].map(label_mapping)

# Print the first few rows to verify
print(df.head())


In [7]:
def load_image(image_id, label):
    # Use TensorFlow's tf.strings.join instead of os.path.join
    image_path = tf.strings.join([image_dir, image_id + ".jpg"], separator="/")

    # Load and preprocess the image
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (224, 224))  # ResNet expects 224x224 images
    image = tf.keras.applications.resnet50.preprocess_input(image)  # Preprocessing for ResNet

    return image, label


In [None]:
# Convert DataFrame columns to TensorFlow tensors
image_ids = df['image_id'].values
labels = df['label'].values

# Create a TensorFlow dataset from the image IDs and labels
dataset = tf.data.Dataset.from_tensor_slices((image_ids, labels))

# Map the load_image function to each element in the dataset
dataset = dataset.map(lambda image_id, label: load_image(image_id, label))

# Define batch size
batch_size = 32

# Shuffle the entire dataset before splitting
dataset = dataset.shuffle(buffer_size=len(df))

# Split into training and validation datasets (80/20 split)
train_size = int(0.8 * len(df))
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

# Apply batching, repeating, and prefetching to the training dataset
train_dataset = train_dataset.batch(batch_size).repeat().prefetch(tf.data.AUTOTUNE)

# Apply batching, repeating, and prefetching to the validation dataset
val_dataset = val_dataset.batch(batch_size).repeat().prefetch(tf.data.AUTOTUNE)


In [None]:
from tensorflow.keras.applications import ResNet152
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model

strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
    # Load the ResNet50 model with pre-trained weights
    resnet_model = ResNet152(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Add custom classification layers
    x = GlobalAveragePooling2D()(resnet_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dense(7, activation='softmax')(x)  # 7 classes in HAM10000

    # Create the final model
    model = Model(inputs=resnet_model.input, outputs=x)

    # Freeze the base ResNet50 layers
    for layer in resnet_model.layers:
        layer.trainable = False

    # Compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
steps_per_epoch = train_size // batch_size
validation_steps = (len(df) - train_size) // batch_size

# Train the model
model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)


In [None]:
batch_size = 8

train_dataset = train_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
train_dataset = train_dataset.cache()

val_dataset = val_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.cache()


In [None]:
data_augmentation = keras.Sequential([
    keras.layers.RandomFlip('horizontal'),
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomContrast(0.1),
    keras.layers.RandomTranslation(0.1, 0.1),
])
