<a href="https://colab.research.google.com/github/laks1402/Derma-detection/blob/derma/SkinDiseases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from PIL import Image

import keras
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.layers import BatchNormalization
from keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

In [3]:
!find /content/drive/MyDrive/Colab\ Notebooks/Datasets/old_FYP\ skin\ disease\ Dataset -name ".DS_Store" -delete


find: ‘/content/drive/MyDrive/Colab Notebooks/Datasets/old_FYP skin disease Dataset’: No such file or directory


In [4]:
import pandas as pd
import os

train_dir = '/content/drive/MyDrive/Colab Notebooks/Datasets'

# Initialize lists
train_data = []

# Add image paths and labels to lists
for label, directory in enumerate(os.listdir(train_dir)):
    if directory == '.DS_Store':
        continue

    directory_path = os.path.join(train_dir, directory)
    if not os.path.isdir(directory_path):
        continue

    for filename in os.listdir(directory_path):
        if filename.startswith('.'):
            continue
        image_path = os.path.join(directory_path, filename)
        train_data.append({'image_path': image_path, 'label': label})

train_df = pd.DataFrame(train_data)

df = pd.concat([train_df], ignore_index=True)
del train_data
df.head()


Unnamed: 0,image_path,label
0,/content/drive/MyDrive/Colab Notebooks/Dataset...,0
1,/content/drive/MyDrive/Colab Notebooks/Dataset...,0
2,/content/drive/MyDrive/Colab Notebooks/Dataset...,0
3,/content/drive/MyDrive/Colab Notebooks/Dataset...,0
4,/content/drive/MyDrive/Colab Notebooks/Dataset...,0


In [5]:
# Get list of directories in train_dir
labels = os.listdir(train_dir)

label_map = {i: label for i, label in enumerate(labels)}
num_classes=len(label_map)
label_map

{0: 'Melanoma',
 1: 'Vascular Lesions',
 2: 'Seborrheic Keratosis',
 3: 'Dermatofibroma',
 4: 'Basal Cell Carcinoma',
 5: 'Acne'}

In [6]:
max_images_per_class = 1500

df = df.sort_values("label").groupby("label").head(max_images_per_class).reset_index(drop=True)

In [7]:
import tensorflow as tf
import multiprocessing

gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
try:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception as e:
    print(e)

max_workers = multiprocessing.cpu_count()
print(f"Available CPU Cores: {max_workers}")


[]
Available CPU Cores: 2


In [8]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
import concurrent.futures
import os
import numpy as np
from PIL import Image

# Resize image arrays
def resize_image_array(image_path):
    try:
        if image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp')):
            return np.asarray(Image.open(image_path).resize((128, 128)))
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
    return None

valid_image_paths = [path for path in df['image_path'].tolist() if path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp'))]

with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    image_arrays = list(executor.map(resize_image_array, valid_image_paths))

# Remove failed image
image_arrays = [img for img in image_arrays if img is not None]

# Add the resized image arrays to the DataFrame
df = df[df['image_path'].isin(valid_image_paths)]
df['image'] = image_arrays


In [10]:
# Count the number of images in each class
class_counts = df['label'].value_counts().sort_index()

# Print the number of images in each class
print("Dataset Summary")
print("-" * 60)
print(f"{'Class Label':<15} {'Class Name':<30} {'Count':<10}")
print("-" * 60)
for class_label, class_name in label_map.items():
    count = class_counts[class_label]
    print(f"{class_label:<15} {class_name:<30} {count:<10}")
print("-" * 60)
print(f"{'Total':<45} {sum(class_counts):<10}")

Dataset Summary
------------------------------------------------------------
Class Label     Class Name                     Count     
------------------------------------------------------------
0               Melanoma                       1500      
1               Vascular Lesions               856       
2               Seborrheic Keratosis           1500      
3               Dermatofibroma                 344       
4               Basal Cell Carcinoma           628       
5               Acne                           1500      
------------------------------------------------------------
Total                                         6328      


In [11]:
df['image'].map(lambda x: x.shape).value_counts()

Unnamed: 0_level_0,count
image,Unnamed: 1_level_1
"(128, 128, 3)",6245
"(128, 128, 4)",83


In [12]:
# Create an ImageDataGenerator object with the desired transformations
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [13]:
augmented_df = pd.DataFrame(columns=['image_path', 'label', 'image'])

for class_label in df['label'].unique():
    image_arrays = df.loc[df['label'] == class_label, 'image'].values

    num_images_needed = max_images_per_class - len(image_arrays)

    augmented_images_list = []
    if num_images_needed > 0:
        selected_images = np.random.choice(image_arrays, size=num_images_needed)

        for image_array in selected_images:
            image_tensor = np.expand_dims(image_array, axis=0)
            augmented_images = train_datagen.flow(image_tensor, batch_size=1)

            for i in range(augmented_images.n):
                augmented_image_array = next(augmented_images)[0].astype('uint8')
                augmented_images_list.append({'image_path': None, 'label': class_label, 'image': augmented_image_array})

    augmented_images_df = pd.DataFrame(augmented_images_list)

    original_images_df = df.loc[df['label'] == class_label, ['image_path', 'label', 'image']]
    augmented_df = pd.concat([augmented_df, original_images_df, augmented_images_df], ignore_index=True)

df = augmented_df.groupby('label').head(max_images_per_class)

del augmented_df

df = df.sample(frac=1, random_state=42).reset_index(drop=True)


In [14]:
# Count the number of images in each class
class_counts = df['label'].value_counts().sort_index()

# Print the number of images in each class
print("Dataset Summary")
print("-" * 60)
print(f"{'Class Label':<15} {'Class Name':<30} {'Count':<10}")
print("-" * 60)
for class_label, class_name in label_map.items():
    count = class_counts[class_label]
    print(f"{class_label:<15} {class_name:<30} {count:<10}")
print("-" * 60)
print(f"{'Total':<45} {sum(class_counts):<10}")

Dataset Summary
------------------------------------------------------------
Class Label     Class Name                     Count     
------------------------------------------------------------
0               Melanoma                       1500      
1               Vascular Lesions               1500      
2               Seborrheic Keratosis           1500      
3               Dermatofibroma                 1500      
4               Basal Cell Carcinoma           1500      
5               Acne                           1500      
------------------------------------------------------------
Total                                         9000      


In [15]:
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Function to preprocess images and remove labels of skipped images
def preprocess_images(image_list, label_list, target_size=(128, 128)):
    processed_images = []
    processed_labels = []

    for img, label in zip(image_list, label_list):
        img = np.array(img)

        # Ignore RGBA images (128, 128, 4)
        if img.shape[-1] == 4:
            print("Skipping RGBA image")
            continue

        # Convert grayscale images to RGB
        if len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

        # Resize image to (128, 128)
        img = cv2.resize(img, target_size)

        processed_images.append(img)
        processed_labels.append(label)

    return np.array(processed_images, dtype=np.float32), np.array(processed_labels)

In [16]:
# 80% train + 20% test split
x_train, x_test, y_train, y_test = train_test_split(df['image'], df['label'], test_size=0.20, shuffle=True)

# Convert images into a consistent format, ignoring RGBA images and labels
x_train, y_train = preprocess_images([np.array(img) for img in x_train], y_train)
x_test, y_test = preprocess_images([np.array(img) for img in x_test], y_test)

# Normalize the images
x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)
x_test_mean = np.mean(x_test)
x_test_std = np.std(x_test)

x_train = (x_train - x_train_mean) / x_train_std
x_test = (x_test - x_test_mean) / x_test_std

num_classes = len(set(df['label']))
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

# Split training data into 60% training + 20% validation
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.25, shuffle=True)

# Ensure correct shape (128, 128, 3)
x_train = x_train.reshape(x_train.shape[0], 128, 128, 3)
x_test = x_test.reshape(x_test.shape[0], 128, 128, 3)
x_validate = x_validate.reshape(x_validate.shape[0], 128, 128, 3)

y_train = y_train.astype(int)
y_validate = y_validate.astype(int)

print("Data preprocessing complete!")
print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}, y_test shape: {y_test.shape}")
print(f"x_validate shape: {x_validate.shape}, y_validate shape: {y_validate.shape}")

Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
Skipping RGBA image
