In [1]:
import os
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [3]:
# 1. Load metadata
skin_df = pd.read_csv('HAM10000_metadata.csv')


In [5]:
# Specify the image directory
image_dir = 'skin-cancer-mnist-ham10000/Untitled Folder 1'  # Change this to the path where images are stored

In [25]:
# Add full file paths to metadata
skin_df['file_path'] = skin_df ['image_id'].apply(lambda x: os.path.join(image_dir, f"{x}.jpg"))

In [27]:
# 2. Create ImageDataGenerator for data augmentation and preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    validation_split=0.2,  # Reserve 20% for validation
    horizontal_flip=True,  # Randomly flip images for augmentation
    rotation_range=30,  # Randomly rotate images
    zoom_range=0.2,  # Randomly zoom images
)

In [29]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Assuming your DataFrame is already loaded into skin_df
# Example: skin_df = pd.read_csv('your_file.csv')

# Initialize LabelEncoder
le = LabelEncoder()

# Fit the encoder to the unique class names (assuming 'dx' column has the diagnosis labels)
le.fit(skin_df['dx'])

# Add a new column 'label' with the encoded numeric labels
skin_df['label'] = le.transform(skin_df['dx'])

# (Optional) Add another column 'label_str' with the original class labels (string version)
skin_df['label_str'] = le.inverse_transform(skin_df['label'])

# Display the first few rows to check the result
print(skin_df.head())

# Print out the classes for verification
print("Classes found:", list(le.classes_))

# If needed, save the updated DataFrame with labels to a new CSV
skin_df.to_csv('skin_with_labels.csv', index=False)


     lesion_id      image_id   dx dx_type   age   sex localization  label  \
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp      2   
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp      2   
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp      2   
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp      2   
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear      2   

  label_str                                          file_path  
0       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
1       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
2       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
3       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
4       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
Classes found: ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']


In [31]:
# Convert the 'label' column to strings
skin_df['label'] = skin_df['label'].astype(str)

# Now create the training and validation data generators
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the image directory (folder containing your images)
image_directory = "skin-cancer-mnist-ham10000/Untitled Folder 1"

# Create an image data generator
datagen = ImageDataGenerator(validation_split=0.2)

# Generate training data
train_gen = datagen.flow_from_dataframe(
    dataframe=skin_df,
    directory=image_directory,  # Directory where images are stored
    x_col="image_id",           # Column containing the relative image filenames
    y_col="label",               # Column containing the encoded labels
    target_size=(224, 224),      # Resize images to the expected input size
    batch_size=32,
    class_mode="categorical",    # Use categorical labels for multi-class classification
    subset="training"            # Set this to 'training' for the training subset
)

# Generate validation data
val_gen = datagen.flow_from_dataframe(
    dataframe=skin_df,
    directory=image_directory,  # Directory where images are stored
    x_col="image_id",           # Column containing the relative image filenames
    y_col="label",               # Column containing the encoded labels
    target_size=(224, 224),      # Resize images to the expected input size
    batch_size=32,
    class_mode="categorical",    # Use categorical labels for multi-class classification
    subset="validation"          # Set this to 'validation' for the validation subset
)

# Now you can use train_gen and val_gen in your model training


Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.




In [33]:
print(skin_df.head())


     lesion_id      image_id   dx dx_type   age   sex localization label  \
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp     2   
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp     2   
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp     2   
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp     2   
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear     2   

  label_str                                          file_path  
0       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
1       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
2       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
3       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  
4       bkl  skin-cancer-mnist-ham10000/Untitled Folder 1\I...  


In [35]:
import os

# Fix file paths by using forward slashes
skin_df['file_path'] = skin_df['file_path'].apply(lambda x: x.replace("\\", "/"))

# Now check the first few file paths to ensure they are correct
print(skin_df['file_path'].head())

# Check if the images exist in the directory
for path in skin_df['file_path'].head():
    if not os.path.exists(path):
        print(f"Image not found: {path}")


0    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
1    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
2    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
3    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
4    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
Name: file_path, dtype: object


In [37]:
import os
invalid_images = [path for path in skin_df['skin-cancer-mnist-ham10000/Untitled Folder 1'] if not os.path.exists(path)]
print(f"Invalid images: {invalid_images}")


KeyError: 'skin-cancer-mnist-ham10000/Untitled Folder 1'

In [39]:
valid_image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']
skin_df['valid_images'] = skin_df['file_path'].apply(lambda x: any(x.lower().endswith(ext) for ext in valid_image_extensions))

# Check if there are any non-image files
print(skin_df[~skin_df['valid_images']])


Empty DataFrame
Columns: [lesion_id, image_id, dx, dx_type, age, sex, localization, label, label_str, file_path, valid_images]
Index: []


In [41]:
# Check the first few file paths and their extensions
print(skin_df['file_path'].head())


0    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
1    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
2    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
3    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
4    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
Name: file_path, dtype: object


In [43]:
# Replace backslashes with forward slashes
skin_df['file_path'] = skin_df['file_path'].apply(lambda x: x.replace('\\', '/'))

# Check the updated paths
print(skin_df['file_path'].head())


0    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
1    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
2    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
3    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
4    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
Name: file_path, dtype: object


In [45]:
# Check if the images exist at the fixed paths
skin_df['file_exists'] = skin_df['file_path'].apply(os.path.exists)

# Check for rows where the file doesn't exist
print(skin_df[~skin_df['file_exists']])


Empty DataFrame
Columns: [lesion_id, image_id, dx, dx_type, age, sex, localization, label, label_str, file_path, valid_images, file_exists]
Index: []


In [46]:
valid_image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']
skin_df['valid_images'] = skin_df['file_path'].apply(lambda x: any(x.lower().endswith(ext) for ext in valid_image_extensions))

# Verify the valid images
print(skin_df[~skin_df['valid_images']])  # This will show invalid images if any


Empty DataFrame
Columns: [lesion_id, image_id, dx, dx_type, age, sex, localization, label, label_str, file_path, valid_images, file_exists]
Index: []


In [47]:
# Show the rows with invalid images
print(skin_df[~skin_df['valid_images']])


Empty DataFrame
Columns: [lesion_id, image_id, dx, dx_type, age, sex, localization, label, label_str, file_path, valid_images, file_exists]
Index: []


In [48]:
import os

# Check if the image files exist
skin_df['file_exists'] = skin_df['file_path'].apply(lambda x: os.path.exists(x))

# Show rows where files do not exist
print(skin_df[~skin_df['file_exists']])


Empty DataFrame
Columns: [lesion_id, image_id, dx, dx_type, age, sex, localization, label, label_str, file_path, valid_images, file_exists]
Index: []


In [49]:
image_check = skin_df['file_path'].iloc[0]
print(f"Checking file: {image_check}")
print(f"Exists: {os.path.exists(image_check)}")


Checking file: skin-cancer-mnist-ham10000/Untitled Folder 1/ISIC_0027419.jpg
Exists: True


In [50]:
# Print the first few file paths to ensure they are correct
print(skin_df['file_path'].head())


0    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
1    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
2    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
3    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
4    skin-cancer-mnist-ham10000/Untitled Folder 1/I...
Name: file_path, dtype: object


In [51]:
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the image directory where the images are stored
image_directory = "skin-cancer-mnist-ham10000/Untitled Folder 1"

# Create an ImageDataGenerator object with rescaling
datagen = ImageDataGenerator(rescale=1./255)

# Create the data generator for the training set
train_gen = datagen.flow_from_dataframe(
    dataframe=skin_df,
    directory=image_directory,  # Ensure this points to the directory containing your images
    x_col="file_path",  # Column containing the image file paths
    y_col="label",  # Column containing the labels
    target_size=(224, 224),  # Resize images to 224x224
    batch_size=32,
    class_mode="categorical"  # Use categorical labels (multi-class classification)
)

# Check the generator
print(f"Classes found: {train_gen.class_indices}")


ImportError: cannot import name 'ImageDataGenerator' from 'keras.preprocessing.image' (C:\Users\jabla\anaconda3\Lib\site-packages\keras\api\preprocessing\image\__init__.py)

In [52]:
import os

# Create the full file paths by joining directory and image filenames
image_directory = "skin-cancer-mnist-ham10000/Untitled Folder 1"
skin_df['file_path'] = skin_df['image_id'].apply(lambda x: os.path.join(image_directory, f"{x}.jpg"))


In [54]:
# Convert the 'label' column to strings
skin_df['label'] = skin_df['label'].astype(str)


In [56]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Define the image directory
image_directory = "skin-cancer-mnist-ham10000/Untitled Folder 1"

# Create an image data generator
datagen = ImageDataGenerator(validation_split=0.2)

# Ensure 'file_path' is the correct column and is valid
print(skin_df['file_path'].head())  # Ensure file paths are correct

# Generate training data
train_gen = datagen.flow_from_dataframe(
    dataframe=skin_df,
    directory=None,  # No need for directory if full file path is already in 'file_path'
    x_col="file_path",  # Column containing the image paths
    y_col="label",      # Column containing the encoded labels
    target_size=(224, 224),  # Resize images to the expected input size
    batch_size=32,
    class_mode="categorical",  # Multi-class classification
    subset="training"          # Use the training subset
)

# Generate validation data
val_gen = datagen.flow_from_dataframe(
    dataframe=skin_df,
    directory=None,  # No need for directory if full file path is already in 'file_path'
    x_col="file_path",  # Column containing the image paths
    y_col="label",      # Column containing the encoded labels
    target_size=(224, 224),  # Resize images to the expected input size
    batch_size=32,
    class_mode="categorical",  # Multi-class classification
    subset="validation"        # Use the validation subset
)


0    skin-cancer-mnist-ham10000/Untitled Folder 1\I...
1    skin-cancer-mnist-ham10000/Untitled Folder 1\I...
2    skin-cancer-mnist-ham10000/Untitled Folder 1\I...
3    skin-cancer-mnist-ham10000/Untitled Folder 1\I...
4    skin-cancer-mnist-ham10000/Untitled Folder 1\I...
Name: file_path, dtype: object
Found 8012 validated image filenames belonging to 7 classes.
Found 2003 validated image filenames belonging to 7 classes.


In [62]:
import numpy as np

# Predict on a batch of images
predictions = model.predict(val_gen, verbose=1)

# Get the predicted class indices (the class with the highest probability)
predicted_class_indices = np.argmax(predictions, axis=-1)

# Reverse the class_indices to map from index to class name
class_indices = train_gen.class_indices
class_names = {v: k for k, v in class_indices.items()}  # Reversed dictionary

# Map predicted indices to class names
predicted_class_names = [class_names[idx] for idx in predicted_class_indices]

# Print the predicted class names
print(predicted_class_names)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 942ms/step
['0', '5', '5', '1', '5', '5', '5', '5', '5', '5', '5', '5', '0', '5', '5', '5', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '1', '5', '1', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '1', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '1', '5', '5', '5', '5', '1', '5', '5', '5', '5', '5', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '1', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '4', '5', '5', '5', '5', '5', '5', '5', '1', '5', '5', '5', '1', '5', '5', '5', '1', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5'

In [58]:
# Example model creation (adjust as needed)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Create a simple CNN model for classification
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(7, activation='softmax')  # 7 classes
])
# Example model creation (adjust as needed)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Create a simple CNN model for classification
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(7, activation='softmax')  # 7 classes
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using the generators
model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=3
)



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/3
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m470s[0m 2s/step - accuracy: 0.7678 - loss: 70.8401 - val_accuracy: 0.0025 - val_loss: 19.0876
Epoch 2/3
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m421s[0m 2s/step - accuracy: 0.8463 - loss: 0.5960 - val_accuracy: 0.0050 - val_loss: 5.8100
Epoch 3/3
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m503s[0m 2s/step - accuracy: 0.8655 - loss: 0.4466 - val_accuracy: 0.0065 - val_loss: 10.8026


<keras.src.callbacks.history.History at 0x14b01668bf0>

In [60]:
# Save the model to a file
model.save('skin_model.h5')


