ðŸ”¹ STEP 0: Mount Google Drive & Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


Mounted at /content/drive


ðŸ”¹ STEP 1: Define Dataset Paths

In [2]:
BASE_PATH = "/content/drive/MyDrive/OEL/PH2Dataset/PH2 Dataset images"
MASK_SAVE_PATH = "/content/drive/MyDrive/OEL/PH2Dataset/generated_masks"
LABEL_TXT = "/content/drive/MyDrive/OEL/PH2Dataset/PH2_dataset.txt"

os.makedirs(MASK_SAVE_PATH, exist_ok=True)

ðŸ”¹ STEP 2: Image Pre-Processing Functions
âœ… 1. Image Enhancement (CLAHE)

In [3]:
def enhance_image(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    merged = cv2.merge((l, a, b))
    return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR)


âœ… 2. Hair Removal (Black-Hat + Inpainting)

In [4]:
def remove_hairs(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17,17))
    blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)
    _, mask = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)
    return cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA)


ðŸ”¹ STEP 3: Lesion Segmentation

In [5]:
def segment_lesion(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5,5), 0)

    _, binary = cv2.threshold(
        blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
    )

    kernel = np.ones((5,5), np.uint8)

    # Morphological Operations
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
    binary = cv2.morphologyEx(binary, cv2.MORPH_ERODE, kernel, iterations=1)

    return binary


ðŸ”¹ STEP 4: Generate Masks + Segmentation Evaluation

In [6]:
y_true_all, y_pred_all = [], []

for folder in tqdm(os.listdir(BASE_PATH)):
    folder_path = os.path.join(BASE_PATH, folder)
    if not os.path.isdir(folder_path):
        continue

    img_path = os.path.join(folder_path, f"{folder}_Dermoscopic_Image")
    gt_path  = os.path.join(folder_path, f"{folder}_lesion")

    if not os.path.exists(img_path) or not os.path.exists(gt_path):
        continue

    img = cv2.imread(os.path.join(img_path, os.listdir(img_path)[0]))
    gt  = cv2.imread(os.path.join(gt_path, os.listdir(gt_path)[0]), 0)

    if img is None or gt is None:
        continue

    img = enhance_image(img)
    img = remove_hairs(img)
    pred_mask = segment_lesion(img)

    cv2.imwrite(
        os.path.join(MASK_SAVE_PATH, f"{folder}_mask.png"),
        pred_mask
    )

    y_true_all.extend((gt > 0).flatten())
    y_pred_all.extend((pred_mask > 0).flatten())


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 200/200 [07:24<00:00,  2.22s/it]


ðŸ”¹ Segmentation Metrics

In [7]:
cm = confusion_matrix(y_true_all, y_pred_all)
TN, FP, FN, TP = cm.ravel()

accuracy = (TP + TN) / (TP + TN + FP + FN)
sensitivity = TP / (TP + FN)
specificity = TN / (TN + FP)

print("Confusion Matrix:\n", cm)
print(f"Accuracy     : {accuracy:.4f}")
print(f"Sensitivity  : {sensitivity:.4f}")
print(f"Specificity  : {specificity:.4f}")


Confusion Matrix:
 [[51276641  8451041]
 [13095887 15339754]]
Accuracy     : 0.7556
Sensitivity  : 0.5395
Specificity  : 0.8585


STEP 5: Load Labels from .txt File

In [8]:
label_dict = {}

with open(LABEL_TXT, "r") as f:
    for line in f:
        if line.strip() == "" or line.startswith("Name") or line.startswith("Legends for Clinical Diagnosis:"):
            continue

        parts = [p.strip() for p in line.split("||") if p.strip()]

        # Ensure there are enough parts to extract image ID and clinical diagnosis
        if len(parts) < 2:
            continue

        img_id = parts[0]
        clinical_diagnosis = parts[1]

        if "Melanoma" in clinical_diagnosis:
            label_dict[img_id] = 1  # Melanoma
        else:
            # Treat all other valid diagnoses as Common Nevus (or benign) if not Melanoma
            label_dict[img_id] = 0  # Common Nevus or other benign lesion
            # Note: This assumes that if it's not Melanoma, it's the 'other' class we want to detect against.
            # For a binary classification of "Melanoma" vs "Non-Melanoma" (including Common Nevus), this approach is suitable.

print("Total labels loaded:", len(label_dict))

Total labels loaded: 201


In [11]:
import os

project_path = '/content/drive/MyDrive/OEL'

if os.path.exists(project_path):
    print(f"Contents of {project_path}:")
    for root, dirs, files in os.walk(project_path):
        level = root.replace(project_path, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print(f'{indent}{os.path.basename(root)}/')
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print(f'{subindent}{f}')
else:
    print(f"Path does not exist: {project_path}")

Contents of /content/drive/MyDrive/OEL:
OEL/
    PH2Dataset/
        PH2_dataset.txt
        Readme.txt
        PH2_dataset.xlsx
        PH2 Dataset images/
            IMD009/
                IMD009_Dermoscopic_Image/
                    IMD009.bmp
                IMD009_lesion/
                    IMD009_lesion.bmp
            IMD004/
                IMD004_lesion/
                    IMD004_lesion.bmp
                IMD004_Dermoscopic_Image/
                    IMD004.bmp
                IMD004_roi/
                    IMD004_R1_Label4.bmp
                    IMD004_R2_Label3.bmp
            IMD002/
                IMD002_Dermoscopic_Image/
                    IMD002.bmp
                IMD002_roi/
                    IMD002_R2_Label3.bmp
                    IMD002_R1_Label4.bmp
                IMD002_lesion/
                    IMD002_lesion.bmp
            IMD015/
                IMD015_Dermoscopic_Image/
                    IMD015.bmp
                IMD015_lesion/
             

ðŸ”¹ STEP 6: Feature Extraction (Masked Area)

In [12]:
def extract_features(img, mask):
    features = []
    _, mask_bin = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)

    for i in range(3):  # BGR
        pixels = img[:,:,i][mask_bin > 0]
        if pixels.size == 0:
            features.extend([0,0])
        else:
            features.extend([pixels.mean(), pixels.std()])
    return features  # 6 features


ðŸ”¹ STEP 7: Build Dataset (X, y)

In [13]:
X, y = [], []

for folder in os.listdir(BASE_PATH):
    if folder not in label_dict:
        continue

    mask_path = os.path.join(MASK_SAVE_PATH, f"{folder}_mask.png")
    img_folder = os.path.join(BASE_PATH, folder, f"{folder}_Dermoscopic_Image")

    if not os.path.exists(mask_path) or not os.path.exists(img_folder):
        continue

    img = cv2.imread(os.path.join(img_folder, os.listdir(img_folder)[0]))
    mask = cv2.imread(mask_path, 0)

    if img is None or mask is None:
        continue

    X.append(extract_features(img, mask))
    y.append(label_dict[folder])

X = np.array(X)
y = np.array(y)

print("Feature matrix:", X.shape)
print("Labels:", y.shape)


Feature matrix: (200, 6)
Labels: (200,)


Train/Test Split Example

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

In [15]:
clf = RandomForestClassifier(
    n_estimators=80,
    max_depth=8,
    random_state=42
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

Classifier:

In [16]:
acc = accuracy_score(y_test, y_pred)
f1  = f1_score(y_test, y_pred)

print("Accuracy:", acc)
print("F1 Score:", f1)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Accuracy: 0.86
F1 Score: 0.2222222222222222

Classification Report:

              precision    recall  f1-score   support

           0       0.88      0.98      0.92        43
           1       0.50      0.14      0.22         7

    accuracy                           0.86        50
   macro avg       0.69      0.56      0.57        50
weighted avg       0.82      0.86      0.82        50

