In [None]:
# STEP 1: Force CPU only
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Disable GPU completely

In [None]:
# STEP 2: Install required packages
!pip install -q kaggle timm albumentations
!pip install -q seaborn scikit-learn

In [None]:
# STEP 3: Import libraries
import os
import pandas as pd
import numpy as np
import seaborn as sns
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
import shutil
import zipfile
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

In [None]:
# STEP 4: Upload kaggle.json to access dataset
from google.colab import files
files.upload()  # Upload your kaggle.json

# Save to correct location
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download dataset
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

# Unzip
!unzip -q skin-cancer-mnist-ham10000.zip -d ham10000_data

In [None]:
# STEP 3: Load Metadata
df = pd.read_csv("ham10000_data/HAM10000_metadata.csv")

# STEP 4: Fix image paths from both folders
image_dir1 = "ham10000_data/HAM10000_images_part_1"
image_dir2 = "ham10000_data/HAM10000_images_part_2"

all_image_paths = {
    os.path.splitext(f)[0]: os.path.join(image_dir1, f)
    for f in os.listdir(image_dir1) if f.endswith(".jpg")
}
all_image_paths.update({
    os.path.splitext(f)[0]: os.path.join(image_dir2, f)
    for f in os.listdir(image_dir2) if f.endswith(".jpg")
})

df["path"] = df["image_id"].map(all_image_paths)
df = df.dropna(subset=["path"])
df["label"] = df["dx"]

# STEP 5: Train/Validation Split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)

# STEP 6: Data Augmentation
train_gen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, zoom_range=0.2)
val_gen = ImageDataGenerator(rescale=1./255)

train_generator = train_gen.flow_from_dataframe(
    train_df,
    x_col="path", y_col="label",
    target_size=(224, 224),
    class_mode="categorical",
    batch_size=16,
    shuffle=True
)

val_generator = val_gen.flow_from_dataframe(
    val_df,
    x_col="path", y_col="label",
    target_size=(224, 224),
    class_mode="categorical",
    batch_size=16,
    shuffle=False
)

# ✅ FIX: Use class indices to determine number of output classes
num_classes = len(train_generator.class_indices)

# STEP 7: Build the Model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# STEP 8: Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint("mobilenetv2_best.h5", monitor="val_accuracy", save_best_only=True)

# STEP 9: Train the Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[early_stop, checkpoint]
)

In [None]:
import os

# Get all image paths from both folders
image_dir1 = "ham10000_data/HAM10000_images_part_1"
image_dir2 = "ham10000_data/HAM10000_images_part_2"

all_image_paths = {
    os.path.splitext(f)[0]: os.path.join(image_dir1, f)
    for f in os.listdir(image_dir1)
    if f.endswith(".jpg")
}
all_image_paths.update({
    os.path.splitext(f)[0]: os.path.join(image_dir2, f)
    for f in os.listdir(image_dir2)
    if f.endswith(".jpg")
})

# Now map correct paths to df
df["path"] = df["image_id"].map(all_image_paths)

# Drop rows with missing images (just in case)
df = df.dropna(subset=["path"])

In [None]:
# STEP 7: Build MobileNetV2 Model
base_model = MobileNetV2(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze backbone

# Add custom head
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

In [None]:
# STEP 8: Train the Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,  # Use 3–5 for now to verify
    verbose=1
)

In [None]:
# STEP 9: Plot Training Results
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.legend()
plt.title("Model Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.grid()
plt.show()