In [7]:
import os
import pandas as pd
import shutil

# ----- Configuration -----
# Path to the CSV file that contains the image data (only image names, no extension)
csv_file = '/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/ISIC_2020_Training_GroundTruth.csv'

# The directory where the original images are stored
source_images_dir = '/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/train'

# The target directory where selected images will be placed
target_dir = '/Users/samrudhsalas/Downloads/SkinGuard/newimg'

# Name for the new CSV file to be created in the target directory
new_csv_filename = '/Users/samrudhsalas/Downloads/SkinGuard/newmeta.csv'

# Number of benign images to select
num_benign = 800

# File extension to append to image names (e.g., ".jpg", ".png")
file_extension = '.jpg'

# ----- Create the target directory if it doesn't exist -----
if not os.path.exists(target_dir):
    os.makedirs(target_dir)
    print(f"Created directory: {target_dir}")

# ----- Load the CSV data -----
df = pd.read_csv(csv_file)

# ----- Filter the data -----
# Adjust these column names ('image_name' and 'benign_malignant') as per your CSV file.
malignant_df = df[df['benign_malignant'] == 'malignant']
benign_df = df[df['benign_malignant'] == 'benign']

# If there are fewer than 800 benign images, this will select all available benign images.
if len(benign_df) < num_benign:
    print(f"Warning: Only {len(benign_df)} benign images found. Selecting all available benign images.")
    selected_benign_df = benign_df
else:
    # Select a random sample of benign images. To select the first 800 instead, replace .sample() with .head()
    selected_benign_df = benign_df.sample(n=num_benign, random_state=42)

# Combine the malignant and benign dataframes into one.
selected_df = pd.concat([malignant_df, selected_benign_df], ignore_index=True)

# ----- Copy (or Move) Images to the Target Directory -----
# We will also build a list to hold the records for our new CSV file.
new_records = []

for idx, row in selected_df.iterrows():
    # Since the CSV contains only the image name, append the extension
    base_filename = row['image_name']
    filename = base_filename + file_extension
    
    src_path = os.path.join(source_images_dir, filename)
    dst_path = os.path.join(target_dir, filename)

    # Check if the source file exists before attempting to copy
    if not os.path.isfile(src_path):
        print(f"File not found: {src_path}. Skipping this file.")
        continue

    try:
        # Copy the image. If you prefer to move the image, use shutil.move(src_path, dst_path)
        shutil.copy(src_path, dst_path)
        print(f"Copied: {filename}")
        new_records.append(row)
    except Exception as e:
        print(f"Error copying {filename}: {e}")

# ----- Create a New CSV File for the Selected Images -----
if new_records:
    new_df = pd.DataFrame(new_records)
    new_csv_path = os.path.join(target_dir, new_csv_filename)
    new_df.to_csv(new_csv_path, index=False)
    print(f"New CSV file created at: {new_csv_path}")
else:
    print("No records to write to CSV.")


Copied: ISIC_0149568.jpg
Copied: ISIC_0188432.jpg
Copied: ISIC_0207268.jpg
Copied: ISIC_0232101.jpg
Copied: ISIC_0247330.jpg
Copied: ISIC_0250839.jpg
Copied: ISIC_0272509.jpg
Copied: ISIC_0273046.jpg
Copied: ISIC_0274382.jpg
Copied: ISIC_0275828.jpg
Copied: ISIC_0280912.jpg
Copied: ISIC_0286360.jpg
Copied: ISIC_0327406.jpg
Copied: ISIC_0333091.jpg
Copied: ISIC_0337631.jpg
Copied: ISIC_0351666.jpg
Copied: ISIC_0369831.jpg
Copied: ISIC_0489267.jpg
Copied: ISIC_0502582.jpg
Copied: ISIC_0504165.jpg
Copied: ISIC_0528044.jpg
Copied: ISIC_0533122.jpg
Copied: ISIC_0559335.jpg
Copied: ISIC_0572205.jpg
Copied: ISIC_0599605.jpg
Copied: ISIC_0624498.jpg
Copied: ISIC_0639769.jpg
Copied: ISIC_0645454.jpg
Copied: ISIC_0647224.jpg
Copied: ISIC_0662000.jpg
Copied: ISIC_0744013.jpg
Copied: ISIC_0779920.jpg
Copied: ISIC_0833889.jpg
Copied: ISIC_0844312.jpg
Copied: ISIC_0862745.jpg
Copied: ISIC_0911264.jpg
Copied: ISIC_0914168.jpg
Copied: ISIC_0924427.jpg
Copied: ISIC_0946787.jpg
Copied: ISIC_0952472.jpg


In [14]:
import os
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, 
                                     Dropout, BatchNormalization)
from tensorflow.keras.optimizers import Adam

# ----- Configuration -----
target_dir = '/Users/samrudhsalas/Downloads/SkinGuard/newimg'         # Directory where images are stored
csv_filename = '/Users/samrudhsalas/Downloads/SkinGuard/newmeta.csv'          # CSV file containing image names (without extension) and labels
csv_file_path = os.path.join(target_dir, csv_filename)
file_extension = '.jpg'                # Adjust if your images use another extension

img_height = 150
img_width = 150

batch_size = 32
epochs = 20

# ----- Load CSV and Prepare Filepaths & Labels -----
df = pd.read_csv(csv_file_path)

# Create full file paths by appending the extension and directory
df['filepath'] = df['image_name'].apply(lambda x: os.path.join(target_dir, x + file_extension))

# Map string labels to numerical values (e.g., benign=0, malignant=1)
label_mapping = {'benign': 0, 'malignant': 1}
df['target'] = df['benign_malignant'].map(label_mapping)

# Convert to numpy arrays
filepaths = df['filepath'].values
labels = df['target'].values

# ----- Create a tf.data Dataset -----
dataset = tf.data.Dataset.from_tensor_slices((filepaths, labels))

# Shuffle the dataset (good for training)
dataset_size = len(filepaths)
dataset = dataset.shuffle(buffer_size=dataset_size, seed=42)

def load_and_preprocess_image(filepath, label):
    """
    Loads an image, decodes it, resizes, and converts it to float32 [0, 1].
    Also casts the label to float32.
    """
    # Read and decode the image
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=3)
    # Resize the image to the desired size
    image = tf.image.resize(image, [img_height, img_width])
    # Convert image to [0, 1] range
    image = tf.image.convert_image_dtype(image, tf.float32)
    
    # Cast the label to float32 (important for binary_crossentropy)
    label = tf.cast(label, tf.float32)
    return image, label

# Map the preprocessing function in parallel
dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

# Split dataset into training (80%) and validation (20%)
train_size = int(0.8 * dataset_size)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

# ----- Build the CNN Model -----
model = Sequential([
    # Input layer is implicitly defined by the shape of the first Conv2D layer
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
])

# Use a lower learning rate if needed (e.g., 0.0001)
optimizer = Adam(learning_rate=0.0001)

model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

# ----- Train the Model -----
history = model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=val_dataset
)

# Save the model after training
model.save('cnn_model_with_csv.h5')
print("Model saved as cnn_model_with_csv.h5")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 2s/step - accuracy: 0.6063 - loss: 4.9961 - val_accuracy: 0.5704 - val_loss: 6.6650
Epoch 2/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 2s/step - accuracy: 0.6768 - loss: 4.7280 - val_accuracy: 0.5993 - val_loss: 3.7553
Epoch 3/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 3s/step - accuracy: 0.7197 - loss: 2.0277 - val_accuracy: 0.6498 - val_loss: 1.6680
Epoch 4/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2s/step - accuracy: 0.7281 - loss: 6.0006 - val_accuracy: 0.7184 - val_loss: 1.4775
Epoch 5/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 2s/step - accuracy: 0.7363 - loss: 7.6460 - val_accuracy: 0.7292 - val_loss: 1.0317
Epoch 6/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 2s/step - accuracy: 0.7092 - loss: 5.1782 - val_accuracy: 0.7545 - val_loss: 0.9265
Epoch 7/20
[1m35/35[0m [32m━━━━━━━━━━



Model saved as cnn_model_with_csv.h5


In [17]:
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import tensorflow as tf

# Load the trained model
model = load_model("cnn_model_with_csv.h5")

# Fix retracing by using tf.function and ensuring input shape consistency @tf.function(reduce_retracing=True)
def predict_image(img_path):
    """Predicts if the given image is malignant or benign."""
    img = image.load_img(img_path, target_size=(150, 150))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = img_array.astype('float32') / 255.0  # Normalize

    prediction = model.predict(img_array, verbose=0)  # Ensure verbose=0 to suppress logs
    class_label = "Malignant" if prediction[0][0] > 0.5 else "Benign"
    print(f"Prediction: {class_label} (Confidence: {prediction[0][0]:.2f})")

# Test an image
test_img = "/Users/samrudhsalas/Downloads/SkinGuard/Skin_Cancer/train/ISIC_0076995.jpg"  # Adjust path
predict_image(test_img)




Prediction: Malignant (Confidence: 0.58)
