In [None]:
the image name column of csv files doesn't have ".jpg" file extension

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D, Conv2D, MaxPooling2D, UpSampling2D, Input
from sklearn.utils import class_weight
import cv2
import matplotlib.pyplot as plt

# Load CSV files
train_csv = pd.read_csv("/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/ISIC_2020_Training_GroundTruth.csv")
test_csv = pd.read_csv("path_to_test_csv.csv")

# Set dataset paths
train_dir = "path_to_train_images/"
test_dir = "path_to_test_images/"

# Address Class Imbalance (SMOTE Alternative - Data Augmentation)
data_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = data_gen.flow_from_dataframe(
    dataframe=train_csv,
    directory=train_dir,
    x_col='image_name',
    y_col='target',
    target_size=(224, 224),
    class_mode='binary',
    batch_size=32,
    subset='training'
)

val_generator = data_gen.flow_from_dataframe(
    dataframe=train_csv,
    directory=train_dir,
    x_col='image_name',
    y_col='target',
    target_size=(224, 224),
    class_mode='binary',
    batch_size=32,
    subset='validation'
)

# Apply Computer Vision Techniques (Edge Detection & Image Enhancement)
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    
    # Apply Gaussian Blur
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    
    # Edge Detection using Canny
    edges = cv2.Canny(blurred, 100, 200)
    
    return edges

# Autoencoder for Unsupervised Feature Extraction
def build_autoencoder():
    input_img = Input(shape=(224, 224, 3))
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    encoded = MaxPooling2D((2, 2), padding='same')(x)
    
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(encoded)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
    
    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

autoencoder = build_autoencoder()
autoencoder.summary()

# Train Autoencoder
autoencoder.fit(train_generator, epochs=10, validation_data=val_generator)

# Load EfficientNetB7 with Transfer Learning
base_model = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base model layers

# Add custom classification layers
global_avg_pooling = GlobalAveragePooling2D()(base_model.output)
dense1 = Dense(512, activation='relu')(global_avg_pooling)
dropout1 = Dropout(0.3)(dense1)
dense2 = Dense(256, activation='relu')(dropout1)
dropout2 = Dropout(0.3)(dense2)
output_layer = Dense(3, activation='softmax')(dropout2)

# Create Model with Transfer Learning
model = Model(inputs=base_model.input, outputs=output_layer)

# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-20:]:  # Unfreeze last 20 layers
    layer.trainable = True

# Compile Model with a lower learning rate for fine-tuning
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = model.fit(train_generator, validation_data=val_generator, epochs=10)

# Save Model
model.save("efficientnetb7_skin_cancer_finetuned.h5")

# Function to manually test model with an image
def predict_image(image_path, model_path="efficientnetb7_skin_cancer_finetuned.h5"):
    model = load_model(model_path)
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    image = image / 255.0  # Normalize
    image = np.expand_dims(image, axis=0)
    
    prediction = model.predict(image)
    class_names = ["Benign", "Malignant", "Other"]
    predicted_class = class_names[np.argmax(prediction)]
    confidence = np.max(prediction)
    
    print(f"Predicted Class: {predicted_class}, Confidence: {confidence:.2f}")
    return predicted_class, confidence

# Example usage
# predict_image("path_to_test_image.jpg")

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D, Conv2D, MaxPooling2D, UpSampling2D, Input
from sklearn.utils import class_weight
import cv2
import matplotlib.pyplot as plt

# Load CSV files
train_csv = pd.read_csv("/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/ISIC_2020_Training_GroundTruth.csv")
test_csv = pd.read_csv("/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/ISIC_2020_Test_Metadata.csv")

# Append .jpg extension to image names
train_csv['image_name'] = train_csv['image_name'].astype(str) + ".jpg"
test_csv['image'] = test_csv['image'].astype(str) + ".jpg"

# Convert target column to string
train_csv['target'] = train_csv['target'].astype(str)

# Set dataset paths
train_dir = "/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/train"
test_dir = "/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/test"

# Address Class Imbalance (SMOTE Alternative - Data Augmentation)
data_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = data_gen.flow_from_dataframe(
    dataframe=train_csv,
    directory=train_dir,
    x_col='image_name',
    y_col='target',
    target_size=(224, 224),
    class_mode='binary',
    batch_size=32,
    subset='training'
)

val_generator = data_gen.flow_from_dataframe(
    dataframe=train_csv,
    directory=train_dir,
    x_col='image_name',
    y_col='target',
    target_size=(224, 224),
    class_mode='binary',
    batch_size=32,
    subset='validation'
)

# Apply Computer Vision Techniques (Edge Detection & Image Enhancement)
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    
    # Apply Gaussian Blur
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    
    # Edge Detection using Canny
    edges = cv2.Canny(blurred, 100, 200)
    
    return edges

# Autoencoder for Unsupervised Feature Extraction
def build_autoencoder():
    input_img = Input(shape=(224, 224, 3))
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    encoded = MaxPooling2D((2, 2), padding='same')(x)
    
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(encoded)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
    
    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

autoencoder = build_autoencoder()
autoencoder.summary()

# Train Autoencoder
def autoencoder_generator(generator):
    while True:
        batch = next(generator)
        yield (batch[0], batch[0])  # Use the input images as the target

autoencoder.fit(autoencoder_generator(train_generator), epochs=10, validation_data=autoencoder_generator(val_generator), steps_per_epoch=len(train_generator), validation_steps=len(val_generator))

# Load EfficientNetB7 with Transfer Learning
base_model = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base model layers

# Add custom classification layers
global_avg_pooling = GlobalAveragePooling2D()(base_model.output)
dense1 = Dense(512, activation='relu')(global_avg_pooling)
dropout1 = Dropout(0.3)(dense1)
dense2 = Dense(256, activation='relu')(dropout1)
dropout2 = Dropout(0.3)(dense2)
output_layer = Dense(3, activation='softmax')(dropout2)

# Create Model with Transfer Learning
model = Model(inputs=base_model.input, outputs=output_layer)

# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-20:]:  # Unfreeze last 20 layers
    layer.trainable = True

# Compile Model with a lower learning rate for fine-tuning
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = model.fit(train_generator, validation_data=val_generator, epochs=10)

# Save Model
model.save("efficientnetb7_skin_cancer_finetuned.h5")

Found 26501 validated image filenames belonging to 2 classes.
Found 6625 validated image filenames belonging to 2 classes.


2025-02-01 10:30:29.565892: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-02-01 10:30:29.566006: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-02-01 10:30:29.566017: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-02-01 10:30:29.567087: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-02-01 10:30:29.567702: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


2025-02-01 10:30:39.281042: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4172s[0m 5s/step - loss: 0.0068 - val_loss: 8.3870e-04
Epoch 2/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4144s[0m 5s/step - loss: 8.0329e-04 - val_loss: 6.6195e-04
Epoch 3/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4094s[0m 5s/step - loss: 7.2292e-04 - val_loss: 7.6178e-04
Epoch 4/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4136s[0m 5s/step - loss: 6.3065e-04 - val_loss: 7.3954e-04
Epoch 5/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4047s[0m 5s/step - loss: 6.0727e-04 - val_loss: 7.7382e-04
Epoch 6/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4032s[0m 5s/step - loss: 5.6217e-04 - val_loss: 4.9471e-04
Epoch 7/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3965s[0m 5s/step - loss: 4.9031e-04 - val_loss: 4.2356e-04
Epoch 8/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4084s[0m 5s/step - loss: 4.7914e-0

  self._warn_if_super_not_called()


Epoch 1/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11235s[0m 13s/step - accuracy: 0.9664 - loss: 1.0156 - val_accuracy: 0.9840 - val_loss: 1.0130
Epoch 2/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9919s[0m 12s/step - accuracy: 0.9811 - loss: 0.9895 - val_accuracy: 0.9840 - val_loss: 0.9099
Epoch 3/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6137s[0m 7s/step - accuracy: 0.9811 - loss: 0.8813 - val_accuracy: 0.9840 - val_loss: 0.7876
Epoch 4/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4705s[0m 6s/step - accuracy: 0.9802 - loss: 0.7582 - val_accuracy: 0.9840 - val_loss: 0.6602
Epoch 5/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4541s[0m 5s/step - accuracy: 0.9830 - loss: 0.6306 - val_accuracy: 0.9840 - val_loss: 0.5395
Epoch 6/10
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5613s[0m 7s/step - accuracy: 0.9820 - loss: 0.5147 - val_accuracy: 0.9840 - val_loss: 0.4326
Epoch 7/10
[

In [None]:
# Function to manually test model with an image
def predict_image(image_path, model_path="efficientnetb7_skin_cancer_finetuned.h5"):
    model = load_model(model_path)
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    image = image / 255.0  # Normalize
    image = np.expand_dims(image, axis=0)
    
    prediction = model.predict(image)
    class_names = ["Benign", "Malignant", "Other"]
    predicted_class = class_names[np.argmax(prediction)]
    confidence = np.max(prediction)
    
    print(f"Predicted Class: {predicted_class}, Confidence: {confidence:.2f}")
    return predicted_class, confidence

# Example usage
# predict_image("path_to_test_image.jpg")