In [3]:
import os
import pandas as pd
root_dir = 'C:\Vinh\Python\Machine Learning\Skin condition detection\Kaggle\IMG_CLASSES'

image_paths = []
labels = []

labels_dict = {
    "1. Eczema 1677": "Eczema",
    "2. Melanoma 15.75k": "Melanoma",
    "3. Atopic Dermatitis - 1.25k": "Atopic Dermatitis",
    "4. Basal Cell Carcinoma (BCC) 3323": "Basal Cell Carcinoma",
    "5. Melanocytic Nevi (NV) - 7970": "Melanocytic Nevi",
    "6. Benign Keratosis-like Lesions (BKL) 2624": "Benign Keratosis-like Lesions",
    "7. Psoriasis pictures Lichen Planus and related diseases - 2k": "Psoriasis pictures Lichen Planus and related diseases",
    "8. Seborrheic Keratoses and other Benign Tumors - 1.8k": "Seborrheic Keratoses and other Benign Tumors",
    "9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k": "Tinea Ringworm Candidiasis and other Fungal Infections",
    "10. Warts Molluscum and other Viral Infections - 2103": "Warts Molluscum and other Viral Infections"
}

for label in os.listdir(root_dir):
    label_dir = os.path.join(root_dir, label)
    if os.path.isdir(label_dir):
        for filename in os.listdir(label_dir):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')): 
                image_paths.append(os.path.join(label_dir, filename))
                labels.append(labels_dict[label])

df = pd.DataFrame({
    'image_path': image_paths,
    'label': labels
})

df = df.sample(frac=1).reset_index(drop=True)

print(df.head())

                                          image_path                 label
0  C:\Vinh\Python\Machine Learning\Skin condition...      Melanocytic Nevi
1  C:\Vinh\Python\Machine Learning\Skin condition...              Melanoma
2  C:\Vinh\Python\Machine Learning\Skin condition...      Melanocytic Nevi
3  C:\Vinh\Python\Machine Learning\Skin condition...  Basal Cell Carcinoma
4  C:\Vinh\Python\Machine Learning\Skin condition...  Basal Cell Carcinoma


In [4]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    # Select the GPU device
    device = torch.device('cuda')
    print(f"CUDA is available. Using device: {torch.cuda.get_device_name(0)}")
else:
    # Select the CPU device
    device = torch.device('cpu')
    print("CUDA is not available. Using CPU.")

# Print the selected device
print(f"Selected device: {device}")

CUDA is available. Using device: NVIDIA GeForce RTX 3050 Ti Laptop GPU
Selected device: cuda


In [5]:
import collections
import copy
import hashlib
import io
import os
import subprocess
import textwrap
import time

from typing import List, Text

from PIL import Image

import numpy as np
import pandas as pd
import tabulate


import tensorflow as tf

import matplotlib.pyplot as plt

In [6]:
condition_to_index = {condition: idx for idx, condition in enumerate(labels_dict.values())}
def conditions_to_binary_matrix(condition, condition_to_index):
    binary_matrix = np.zeros(len(condition_to_index))
    if condition in condition_to_index:
        binary_matrix[condition_to_index[condition]] = 1
    return binary_matrix
binary_labels = np.array([conditions_to_binary_matrix(cond, condition_to_index)
                          for cond in df["label"]])

print(len(binary_labels))
print(binary_labels)
print(binary_labels[5])

27153
[[0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]


In [7]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from PIL import Image
import numpy as np

def load_and_preprocess_image(image_path, target_size=(128, 128)):
    try:
        image = Image.open(image_path).convert('RGB')
        image = image.resize(target_size)
        image = np.array(image, dtype=np.float32) / 255.0
        return image
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return np.zeros(target_size + (3,), dtype=np.float32)

def process_images(df, batch_size=100, target_size=(128, 128), max_workers=8):
    all_images = []
    
    def load_image(path):
        return load_and_preprocess_image(path, target_size)
    
    num_images = len(df)
    
    # Process images in batches
    for start in range(0, num_images, batch_size):
        batch_paths = df['image_path'].iloc[start:start + batch_size]
        batch_images = []
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = [executor.submit(load_image, path) for path in batch_paths]
            for future in as_completed(futures):
                batch_images.append(future.result())
        
        # Convert batch to NumPy array and append
        all_images.append(np.array(batch_images, dtype=np.float32))
    
    # Concatenate all batches into one array
    return np.concatenate(all_images, axis=0)

# Example usage
batch_size = 100  # Adjust batch size as needed
images = process_images(df, batch_size=batch_size)
print(images.shape)

(27153, 128, 128, 3)


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(images, binary_labels, test_size=0.2, random_state=42)

In [9]:
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("y_train shape:", y_train.shape)
print("y_val shape:", y_val.shape)

print("Number of samples in X_train:", X_train.shape[0])
print("Number of samples in X_val:", X_val.shape[0])
print("Number of samples in y_train:", y_train.shape[0])
print("Number of samples in y_val:", y_val.shape[0])

X_train shape: (21722, 128, 128, 3)
X_val shape: (5431, 128, 128, 3)
y_train shape: (21722, 10)
y_val shape: (5431, 10)
Number of samples in X_train: 21722
Number of samples in X_val: 5431
Number of samples in y_train: 21722
Number of samples in y_val: 5431


In [15]:
import tensorflow as tf

# Check TensorFlow GPU support and cuDNN version
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("cuDNN Version: ", tf.sysconfig.get_build_info()['cudnn_version'])

Num GPUs Available:  0


KeyError: 'cudnn_version'

In [8]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))

import tensorflow as tf

datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)


datagen.fit(X_train)

In [11]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

base_model.trainable = False

inputs = Input(shape=(128, 128, 3))
x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
outputs = Dense(10, activation='softmax')(x)  

model = Model(inputs, outputs)

model.summary()


In [12]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
history = model.fit(
    x=X_train, 
    y=y_train, 
    batch_size=32, 
    epochs=10, 
    validation_data=(X_val, y_val)
)

Epoch 1/10
[1m679/679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 665ms/step - accuracy: 0.2800 - loss: 2.1652 - val_accuracy: 0.2922 - val_loss: 2.1391
Epoch 2/10
[1m679/679[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 528ms/step - accuracy: 0.2854 - loss: 2.1429

KeyboardInterrupt: 