In [None]:
# Notebook 5 — ResNet50 Fine-Tuned (7-Class Skin Lesion Classification)
# Colab-ready

# 0) Imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input as resnet_preprocess
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

print("TensorFlow:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))

# GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Enabled GPU memory growth.")
    except:
        pass




In [None]:
# 1) Paths & Config
IMAGE_FOLDER = "/content/drive/MyDrive/HAM10000_images"   # where images live (Notebook1 should have created/cleaned this)
SPLITS_DIR = "/content/drive/MyDrive/splits"              # output from Notebook 1: df_train.csv, df_val.csv, df_test.csv
OUTPUT_DIR = "/content/drive/MyDrive/models"
os.makedirs(OUTPUT_DIR, exist_ok=True)

IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 10
NUM_CLASSES = 7




In [None]:
# 2) Load CSV splits
df_train = pd.read_csv(f"{SPLITS_DIR}/df_train.csv")
df_val   = pd.read_csv(f"{SPLITS_DIR}/df_val.csv")
df_test  = pd.read_csv(f"{SPLITS_DIR}/df_test.csv")

print("Train:", len(df_train), "Val:", len(df_val), "Test:", len(df_test))

# Ensure filepath exists
for d in [df_train, df_val, df_test]:
    if 'filepath' not in d.columns:
        d['filename'] = d['image_id'].astype(str) + ".jpg"
        d['filepath'] = d['filename'].apply(lambda x: os.path.join(IMAGE_FOLDER, x))




In [None]:
# 3) Generators — MUST use ResNet preprocess
train_gen = ImageDataGenerator(
    preprocessing_function=resnet_preprocess,
    rotation_range=20,
    width_shift_range=0.10,
    height_shift_range=0.10,
    zoom_range=0.10,
    horizontal_flip=True,
    fill_mode="nearest"
)

val_gen = ImageDataGenerator(preprocessing_function=resnet_preprocess)
test_gen = ImageDataGenerator(preprocessing_function=resnet_preprocess)

train_flow = train_gen.flow_from_dataframe(
    df_train, x_col="filepath", y_col="dx",
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode="categorical", shuffle=True
)

val_flow = val_gen.flow_from_dataframe(
    df_val, x_col="filepath", y_col="dx",
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode="categorical", shuffle=False
)

test_flow = test_gen.flow_from_dataframe(
    df_test, x_col="filepath", y_col="dx",
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode="categorical", shuffle=False
)

print("Classes:", train_flow.class_indices)




In [None]:
# 4) Build Model (ResNet50)
base_res = ResNet50(weights='imagenet', include_top=False,
                    input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Freeze all except last 50 layers → prevents overfitting
for layer in base_res.layers[:-50]:
    layer.trainable = False
for layer in base_res.layers[-50:]:
    layer.trainable = True

x = base_res.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
preds = Dense(NUM_CLASSES, activation='softmax')(x)

resnet_model = Model(inputs=base_res.input, outputs=preds)

resnet_model.compile(
    optimizer=Adam(1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

resnet_model.summary()




In [None]:
# 5) Callbacks
save_path = f"{OUTPUT_DIR}/resnet50_best.h5"
callbacks = [
    EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, verbose=1),
    ModelCheckpoint(save_path, monitor='val_loss', save_best_only=True, verbose=1)
]




In [None]:
# 6) Train
history = resnet_model.fit(
    train_flow,
    validation_data=val_flow,
    epochs=EPOCHS,
    callbacks=callbacks
)


resnet_model.save(f"{OUTPUT_DIR}/resnet50_last.h5")
print("Model saved!")




In [None]:
# 7) Plot curves
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy']); plt.plot(history.history['val_accuracy'])
plt.title('Accuracy'); plt.legend(['train','val'])

plt.subplot(1,2,2)
plt.plot(history.history['loss']); plt.plot(history.history['val_loss'])
plt.title('Loss'); plt.legend(['train','val'])
plt.show()




In [None]:
# 8) Evaluation — Load Best Model
best = load_model(save_path)

loss, acc = best.evaluate(test_flow, verbose=1)
print(f"\nTest Accuracy: {acc*100:.2f}%")

# Predictions
test_flow.reset()
y_prob = best.predict(test_flow, verbose=1)
y_pred = np.argmax(y_prob, axis=1)
y_true = test_flow.classes

# Mapping
inv_class_map = {v:k for k,v in train_flow.class_indices.items()}

y_pred_lab = [inv_class_map[i] for i in y_pred]
y_true_lab = [inv_class_map[i] for i in y_true]

print("\nClassification Report:")
print(classification_report(y_true_lab, y_pred_lab))

# Confusion Matrix
cm = confusion_matrix(y_true_lab, y_pred_lab, labels=list(inv_class_map.values()))

plt.figure(figsize=(9,7))
sns.heatmap(cm, annot=True, fmt='d',
            xticklabels=inv_class_map.values(),
            yticklabels=inv_class_map.values())
plt.title("Confusion Matrix — ResNet50")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()



In [None]:

# 9) Inference — Upload image and predict
from google.colab import files
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def predict_resnet(model, img_path):
    img = load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
    arr = img_to_array(img)
    arr = np.expand_dims(arr, axis=0)
    arr = resnet_preprocess(arr)
    prob = model.predict(arr)[0]
    idx = np.argmax(prob)
    return inv_class_map[idx], prob[idx]

print("Upload images to classify:")
uploaded = files.upload()
for fn in uploaded.keys():
    label, confidence = predict_resnet(best, fn)
    print(f"{fn} → {label} ({confidence*100:.2f}%)")
