In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Load data from CSV
csv_path = '/kaggle/input/isic-2019/ISIC_2019_Training_GroundTruth.csv'
data = pd.read_csv(csv_path)

# Exclude 'UNK' class from the classes list and class to index mapping
classes = data.columns[1:-1].tolist()
class_to_idx = {cls: idx for idx, cls in enumerate(classes)}

# Extract image names and labels
image_names = data['image'].tolist()
labels = data.drop(columns=['image', 'UNK']).values.tolist()

# Construct full paths to images
image_dir = '/kaggle/input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input'
image_paths = [os.path.join(image_dir, name + '.jpg') for name in image_names]

# Visualize the distribution of classes
class_counts = np.sum(labels, axis=0)
plt.figure(figsize=(10, 6))
sns.barplot(x=classes, y=class_counts)
plt.xlabel('Class')
plt.ylabel('Count')
plt.title('Class Distribution')
plt.xticks(rotation=45)
plt.show()

# Split data into training and test sets and convert labels
train_images, test_images, train_labels, test_labels = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Preprocessing functions
def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [260, 260])  # Resize without 'nearest' mode
    image = tf.cast(image, tf.float32) / 255.0  # Normalize
    return image

def data_augment(image, label):
    image = tf.image.random_flip_left_right(image)
    return image, label

def load_and_preprocess_from_path_labels(path, label):
    return load_and_preprocess_image(path), label

# Create TensorFlow datasets
train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_ds = train_ds.map(load_and_preprocess_from_path_labels, num_parallel_calls=tf.data.AUTOTUNE)\
                   .map(data_augment, num_parallel_calls=tf.data.AUTOTUNE)\
                   .shuffle(2048).batch(32).prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_ds = test_ds.map(load_and_preprocess_from_path_labels, num_parallel_calls=tf.data.AUTOTUNE).batch(32).prefetch(tf.data.AUTOTUNE)

# Define learning rate schedule
LR_START = 0.00001
LR_MAX = 0.00005 
LR_MIN = 0.00001
LR_RAMPUP_EPOCHS = 4
LR_SUSTAIN_EPOCHS = 0
LR_EXP_DECAY = 0.8
EPOCHS = 15

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr

lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)

# Load EfficientNetB2 model and add custom classification layer
base_model = tf.keras.applications.EfficientNetB2(include_top=False, input_shape=(260, 260, 3), pooling='avg')
base_output = base_model.output
classifier = tf.keras.layers.Dense(len(classes), activation='softmax')(base_output)
model = tf.keras.Model(inputs=base_model.input, outputs=classifier)

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'],
)

# Train the model
history = model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=EPOCHS,
    verbose=1,
    callbacks=[lr_callback],
)

# Save the model
model.save('/kaggle/working/EfficientNetB2.h5')

# Evaluate model performance
y_true = np.argmax(test_labels, axis=1)
y_pred = np.argmax(model.predict(test_ds), axis=1)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
conf_matrix = confusion_matrix(y_true, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)

# Print the number of images belonging to each label in training data
print("Training Class Distribution:")
print(data.drop(columns=['image', 'UNK']).sum())

# Print the number of images belonging to each label in test data
print("Test Class Distribution:")
test_data = pd.DataFrame({'image': test_images, 'label': test_labels})
print(test_data['label'].value_counts())

# Class index to name mapping
class_index_to_name = {idx: cls for cls, idx in class_to_idx.items()}
print("Class Index to Name Mapping:")
print(class_index_to_name)

























In [2]:
# Plot training history
def plot_training_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(acc) + 1)

    plt.figure(figsize=(14, 5))

    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'b', label='Training accuracy')
    plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

# Plot the training history
plot_training_history(history)

# Evaluate model performance
y_true = np.argmax(test_labels, axis=1)
y_pred = np.argmax(model.predict(test_ds), axis=1)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
conf_matrix = confusion_matrix(y_true, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)

# Plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap='Blues', xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

plot_confusion_matrix(y_true, y_pred, classes)

# Print the number of images belonging to each label in training data
print("Training Class Distribution:")
print(data.drop(columns=['image', 'UNK']).sum())

# Print the number of images belonging to each label in test data
print("Test Class Distribution:")
test_data = pd.DataFrame({'image': test_images, 'label': test_labels})
print(test_data['label'].value_counts())

# Class index to name mapping
class_index_to_name = {idx: cls for cls, idx in class_to_idx.items()}
print("Class Index to Name Mapping:")
print(class_index_to_name)





In [3]:
import tensorflow as tf
import numpy as np

# Function to load and preprocess a single image
def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [260, 260])
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Function to predict the label of a single image
def predict_image(model, image_path, class_index_to_name):
    image = load_and_preprocess_image(image_path)
    image = tf.expand_dims(image, axis=0)  # Add batch dimension
    predictions = model.predict(image)
    predicted_label_index = np.argmax(predictions, axis=1)[0]
    predicted_label_name = class_index_to_name[predicted_label_index]
    return predicted_label_name

# Load the trained model
model = tf.keras.models.load_model('/kaggle/working/EfficientNetB2.h5')

# Dictionary to map class indices to label names
class_index_to_name = {0: 'MEL', 1: 'NV', 2: 'BCC', 3: 'AK', 4: 'BKL', 5: 'DF', 6: 'VASC', 7: 'SCC'}

# Path to the image you want to predict
image_path = '/kaggle/input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input/ISIC_0012214_downsampled.jpg'

# Predict the label of the image
predicted_label = predict_image(model, image_path, class_index_to_name)
print("Predicted Label:", predicted_label)




In [None]:
import os
import pandas as pd
import tensorflow as tf

# Function to load and preprocess a single image
def load_and_preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [260, 260])
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Function to predict the label of a single image
def predict_image(model, image_path):
    image = load_and_preprocess_image(image_path)
    image = tf.expand_dims(image, axis=0)  # Add batch dimension
    predictions = model.predict(image)
    predicted_label = tf.argmax(predictions, axis=1)[0]
    return predicted_label.numpy()

# Load the trained model
model = tf.keras.models.load_model('/kaggle/working/EfficientNetB2.h5')

# Load the CSV file with image names
csv_path = '/kaggle/input/isic-2019/ISIC_2019_Training_GroundTruth.csv'
data = pd.read_csv(csv_path)

# Path to the directory containing images
image_dir = '/kaggle/input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input'

# Predict labels for all rows and create a new column for predicted labels
predicted_labels = []
for index, row in data.iterrows():
    image_name = row['image']
    image_path = os.path.join(image_dir, image_name + '.jpg')
    predicted_label = predict_image(model, image_path)
    predicted_labels.append(predicted_label)

# Add predicted labels to the DataFrame
data['predicted_label'] = predicted_labels

# Save the updated CSV file with predicted labels for all rows
output_csv_path = '/kaggle/working/predicted_labels.csv'
data.to_csv(output_csv_path, index=False)

print(f"Predicted labels saved to {output_csv_path}")




In [6]:
import pandas as pd

# Load the CSV file
csv_file = '/kaggle/working/predicted_labels.csv'  # Replace with the actual path to your CSV file
df = pd.read_csv(csv_file)

# Define the mapping
mapping = {0: 'MEL', 1: 'NV', 2: 'BCC', 3: 'AK', 4: 'BKL', 5: 'DF', 6: 'VASC', 7: 'SCC'}

# Check the authenticity of the predictions for all rows
correct_predictions = 0
incorrect_predictions = 0

for index, row in df.iterrows():
    predicted_label_index = row['predicted_label']
    predicted_label_name = mapping[predicted_label_index]
    
    # Check if the predicted label matches the actual label
    if row[predicted_label_name] == 1:
        correct_predictions += 1
    else:
        incorrect_predictions += 1

# Output the results
total_predictions = len(df)

print(f'Correct predictions: {correct_predictions}')
print(f'Incorrect predictions: {incorrect_predictions}')

# Calculate percentages
correct_percentage = (correct_predictions / total_predictions) * 100
incorrect_percentage = (incorrect_predictions / total_predictions) * 100

# Print the results
print(f'Correct prediction percentage: {correct_percentage:.2f}%')
print(f'Incorrect prediction percentage: {incorrect_percentage:.2f}%')


