In [None]:
import os
from sklearn.model_selection import train_test_split
from shutil import copyfile

# Set the path to your original dataset
original_dataset_path = 'D:/capstone_project/skin_cancer/pre_processed_images'

# Output directory for training and testing
destination_folder = 'D:/capstone_project/skin_cancer'

# Ensure the output directories exist
os.makedirs(os.path.join(destination_folder, 'train'), exist_ok=True)
os.makedirs(os.path.join(destination_folder, 'test'), exist_ok=True)

def copy_images(image_paths, destination_folder, class_name):
    class_folder_train = os.path.join(destination_folder, 'train', class_name)
    class_folder_test = os.path.join(destination_folder, 'test', class_name)

    os.makedirs(class_folder_train, exist_ok=True)
    os.makedirs(class_folder_test, exist_ok=True)

    train_images, test_images = train_test_split(image_paths, test_size=0.2, random_state=42)

    for img_path in train_images:
        img_name = os.path.basename(img_path)
        destination_path = os.path.join(class_folder_train, img_name)
        try:
            copyfile(img_path, destination_path)
        except FileNotFoundError:
            print(f"File not found: {img_path}")

    for img_path in test_images:
        img_name = os.path.basename(img_path)
        destination_path = os.path.join(class_folder_test, img_name)
        try:
            copyfile(img_path, destination_path)
        except FileNotFoundError:
            print(f"File not found: {img_path}")

# Get a list of all image paths in the original dataset
image_paths = []
for root, dirs, files in os.walk(original_dataset_path):
    for file in files:
        if file.lower().endswith(('jpg', 'jpeg', 'png')):
            image_paths.append(os.path.join(root, file))

# Copy images to the training and testing directories with class-wise folders
for class_name in os.listdir(original_dataset_path):
    class_images = [img for img in image_paths if os.path.join(class_name) in img]
    copy_images(class_images, destination_folder, class_name)


In [1]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Set the path to your dataset
dataset_path = 'D:/capstone_project/skin_cancer/train'

# Define constants
batch_size = 32
img_height, img_width = 224, 224

# Create an ImageDataGenerator for data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80% for training, 20% for validation
)

# Create data generators for training and validation
train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Use the training subset
)

validation_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Use the validation subset
)

# Define the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(img_height, img_width, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(7, activation='softmax'))  # Adjust based on the number of classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_generator, epochs=100, validation_data=validation_generator)

# Save the trained model
model.save('skin_cancer_cnn_model.h5')



Found 6411 images belonging to 7 classes.
Found 1599 images belonging to 7 classes.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100

<h1> Segmentation</h1>

In [10]:
import os
import cv2
from skimage import exposure, io, img_as_ubyte
from skimage.filters import gaussian
import numpy as np

# Set the path to your dataset
dataset_path = r'D:\capstone_project\skin_cancer\HAM10000_images_part_1'

# Output directory for preprocessed images
output_path = r'D:\capstone_project\skin_cancer\pre_segmented'

# Ensure the output directory exists
os.makedirs(output_path, exist_ok=True)

# Function to preprocess an image
def preprocess_image(image_path, output_path):
    # Read the image
    image = io.imread(image_path)

    # Normalize pixel values to the range [0, 1]
    normalized_image = image.astype('float') / 255.0
    
    # Apply contrast enhancement using adaptive histogram equalization
    enhanced_image = exposure.equalize_adapthist(normalized_image)

    # Apply noise cancellation using Gaussian blur
    denoised_image = gaussian(enhanced_image, sigma=0.5)

    # Convert the denoised image to uint8 before saving
    denoised_image_uint8 = img_as_ubyte(denoised_image)

    # Convert image to HSV color space
    hsv_image = cv2.cvtColor(denoised_image_uint8, cv2.COLOR_RGB2HSV)

    # Define lower and upper bounds for non-cancerous skin color in HSV
    lower_bound = np.array([0, 30, 10])  # Adjust these values based on your requirements
    upper_bound = np.array([20, 150, 255])  # Adjust these values based on your requirements

    # Create a binary mask using inRange function
    mask = cv2.inRange(hsv_image, lower_bound, upper_bound)

    # Set non-cancerous regions to orange and cancerous to original color
    segmented_image = image.copy()
    segmented_image[mask == 0] = [255, 165, 0]  # Orange color for non-cancerous regions

    # Save the preprocessed image
    output_file = os.path.join(output_path, os.path.basename(image_path))
    io.imsave(output_file, segmented_image)

# Apply preprocessing to each image in the dataset
for root, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.endswith(('jpg', 'jpeg', 'png')):
            image_path = os.path.join(root, file)
            preprocess_image(image_path, output_path)


  io.imsave(output_file, segmented_image)
  io.imsave(output_file, segmented_image)
  io.imsave(output_file, segmented_image)
  io.imsave(output_file, segmented_image)
  io.imsave(output_file, segmented_image)
  io.imsave(output_file, segmented_image)
  io.imsave(output_file, segmented_image)
  io.imsave(output_file, segmented_image)


KeyboardInterrupt: 

<h3> testing </h3>

In [2]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

# Set the path to your test dataset
test_dataset_path = 'D:/capstone_project/skin_cancer/test'

# Load the trained CNN model
model = load_model('skin_cancer_cnn_model.h5')

# Define constants
batch_size = 32
img_height, img_width = 224, 224

# Create an ImageDataGenerator for normalization (no data augmentation during testing)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create a data generator for testing
test_generator = test_datagen.flow_from_directory(
    test_dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

# Evaluate the model on the test dataset
eval_result = model.evaluate(test_generator)

# Print the evaluation result (accuracy and loss)
print(f"Test Accuracy: {eval_result[1]*100:.2f}%")
print(f"Test Loss: {eval_result[0]}")


OSError: No file or directory found at skin_cancer_cnn_model.h5

Data Augmentation and Transfer Learning


In [1]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model

# Set the path to your dataset
train_dataset_path = 'D:/capstone_project/skin_cancer/test'
test_dataset_path = 'D:/capstone_project/skin_cancer/test'

# Constants
img_height, img_width = 224, 224
batch_size = 32

# Create data generator with data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Create data generator for testing (no data augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load VGG16 model with pre-trained weights (excluding top layer)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

# Create a new model with VGG16 and additional layers
model = Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(7, activation='softmax'))  # Adjust based on the number of classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Create data generators
train_generator = train_datagen.flow_from_directory(
    train_dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False  # Important for keeping track of filenames
)

# Train the model
model.fit(train_generator, epochs=50, validation_data=test_generator)

# Evaluate the model on the test set
# evaluation = model.evaluate(test_generator)
# print(f"Test Accuracy: {evaluation[1] * 100:.2f}%")
    

Found 2005 images belonging to 7 classes.
Found 2005 images belonging to 7 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

In [5]:
import certifi
print(certifi.where())  

C:\Users\asus\AppData\Roaming\Python\Python311\site-packages\certifi\cacert.pem


In [10]:
os.environ['REQUESTS_CA_BUNDLE'] ='C:/Users/asus/AppData/Roaming/Python/Python311/site-packages/certifi/cacert.pem'


In [3]:
from cProfile import label
from pyexpat import features
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Load your dataset and split it into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.2, random_state=42)

# Create a list to store individual models
models = []

# Train multiple individual models
for i in range(5):
    # Create a random forest classifier
    model = RandomForestClassifier(n_estimators=100, random_state=i)
    
    # Train the model
    model.fit(X_train, y_train)
    
    # Add the trained model to the list
    models.append(model)

# Make predictions using each individual model
predictions = []
for model in models:
    y_pred = model.predict(X_test)
    predictions.append(y_pred)

# Perform majority voting to get the final ensemble prediction
ensemble_predictions = []
for i in range(len(X_test)):
    votes = [predictions[j][i] for j in range(len(models))]
    majority_vote = max(set(votes), key=votes.count)
    ensemble_predictions.append(majority_vote)

# Calculate the accuracy of the ensemble model
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print(f"Ensemble Accuracy: {ensemble_accuracy}")
# Set the path to your dataset
dataset_path = 'D:/capstone_project/skin_cancer/test'

# Define constants
batch_size = 32
img_height, img_width = 224, 224

# Create an ImageDataGenerator for data augmentation and normalization
datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80% for training, 20% for validation
)

# Create data generators for training and validation
train_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Use the training subset
)

validation_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Use the validation subset
)

# Define the individual models
models = []
for i in range(5):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(img_height, img_width, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(7, activation='softmax'))  # Adjust based on the number of classes

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    models.append(model)

# Train the individual models
for model in models:
    model.fit(train_generator, epochs=100, validation_data=validation_generator)

# Make predictions using each individual model
predictions = []
for model in models:
    y_pred = model.predict(validation_generator)
    predictions.append(y_pred)

# Perform majority voting to get the final ensemble prediction
ensemble_predictions = []
for i in range(len(validation_generator)):
    votes = [predictions[j][i] for j in range(len(models))]
    majority_vote = np.argmax(np.sum(votes, axis=0))
    ensemble_predictions.append(majority_vote)

# Get the ground truth labels
y_true = validation_generator.classes

# Calculate the accuracy of the ensemble model
ensemble_accuracy = accuracy_score(y_true, ensemble_predictions)
print(f"Ensemble Accuracy: {ensemble_accuracy}")


TypeError: Singleton array array(<function label at 0x000001847CADBEC0>, dtype=object) cannot be considered a valid collection.

efficient net


In [2]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

# Set the path to your dataset
dataset_path = 'D:/capstone_project/skin_cancer/pre_processed_images'


# Constants
img_height, img_width = 224, 224
batch_size = 32

# Create an ImageDataGenerator with data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80% for training, 20% for validation
)

# Create data generators for training and validation
train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Use the training subset
)

validation_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Use the validation subset
)

# Load pre-trained EfficientNetB3 model
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

# Create a new model with EfficientNetB3 and additional layers
model = Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(7, activation='softmax'))  # Adjust based on the number of classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_generator, epochs=50, validation_data=validation_generator)
