In [None]:
import numpy as np
import cv2
import os
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

dataset_dir = '/kaggle/input/isic-separated-2019/separated_ISIC_2019'  # Update this path to your dataset directory

# Parameters
img_height, img_width = 96, 96
num_classes = 8  # Number of classes in the dataset

# Function to load images and labels
def load_data(dataset_dir, img_height, img_width):
    images = []
    labels = []
    count = 0
    
    for disease in os.listdir(dataset_dir):
        disease_dir = os.path.join(dataset_dir, disease)
        if os.path.isdir(disease_dir):
            count = 0
            for img_name in os.listdir(disease_dir):
                img_path = os.path.join(disease_dir, img_name)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (img_width, img_height))
                #img = img.astype('float32') / 255.0
                img_array = np.array(img) / 255.0
                images.append(img_array)
                labels.append(disease)
                count+=1 
                
                if count >= max_images_per_class:
                    break
                
    return np.array(images), np.array(labels)

# Load the dataset
images, labels = load_data(dataset_dir, img_height, img_width)

labels.shape

unique_values, counts = np.unique(labels, return_counts=True)

print("Unique values:", unique_values)
print("Counts:", counts)

label_binarizer = LabelBinarizer()
labels = label_binarizer.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=26, stratify=labels)

# Data augmentation
datagen = ImageDataGenerator(
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2
)


cnn_model = Sequential([
    # First Conv2D layer
    Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(96, 96, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(3, 3)),  # Reduces spatial dimensions to 32x32
    Dropout(0.25),
    
    # Second Conv2D layer
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    # Third Conv2D layer
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    # Flatten layer
    Flatten(),
    
    # Dense layers
    Dense(1024, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    
    # Output layer
    Dense(8, activation='softmax')  # 7 classes for classification
])

cnn_model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

epochs = 50
batch_size = 32

# Fit the model using data augmentation
cnn_model.fit(datagen.flow(X_train, y_train, batch_size=batch_size),
              epochs=epochs,
              validation_data=(X_test, y_test))


# Evaluate the CNN model
cnn_loss, cnn_accuracy = cnn_model.evaluate(X_test, y_test, verbose=2)

# Predict class probabilities
y_pred_probs = cnn_model.predict(X_test)

# Convert predictions to class labels
y_pred = np.argmax(y_pred_probs, axis=1)

# Check if y_test is in one-hot format and convert to class labels if necessary
if y_test.ndim > 1 and y_test.shape[1] > 1:  # Assuming it's one-hot encoded
    y_test = np.argmax(y_test, axis=1)

# Calculate precision, recall, and f1-score for CNN
cnn_precision = precision_score(y_test, y_pred, average='weighted')
cnn_recall = recall_score(y_test, y_pred, average='weighted')
cnn_f1 = f1_score(y_test, y_pred, average='weighted')

# Print the classification report
print("\nCNN - Classification Report:")
print(classification_report(y_test, y_pred))




<h1> FEATURE EXTRACTION of 25,331 IMAGES <h1>

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

# Set the dataset path
dataset_dir = '/kaggle/input/isic-separated-2019/separated_ISIC_2019'  # Update this path to your dataset

# Parameters
img_height, img_width = 224, 224  # EfficientNetB7 input size
batch_size = 32  # Batch size for efficient memory usage
num_classes = 8  # Number of categories in your dataset

# Load the EfficientNetB7 model pre-trained on ImageNet
base_model = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Add custom layers for feature extraction
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
feature_extractor = Model(inputs=base_model.input, outputs=x)

# Define the data generator for loading images in batches and apply augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input  # EfficientNetB7 preprocessing
)

# Use flow_from_directory to load images and labels from the dataset directory in batches
image_generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',  # Generates one-hot encoded labels
    shuffle=False  # No shuffling to ensure images and labels are in order
)




<h3> EXTRACTION USING EFFICIENT NET B7 </h3>

# Initialize empty lists to store features and labels
extracted_features = []
extracted_labels = []

# Process the images and extract features in batches
for batch_images, batch_labels in image_generator:
    # Extract features for the batch of images
    features_batch = feature_extractor.predict(batch_images)
    
    # Append the extracted features and corresponding labels
    extracted_features.append(features_batch)
    extracted_labels.append(batch_labels)

    # To avoid infinite loop, break when done
    if image_generator.batch_index == image_generator.n // batch_size:
        break

# Convert the lists of features and labels into numpy arrays
extracted_features = np.vstack(extracted_features)
extracted_labels = np.vstack(extracted_labels)

# Save the extracted features and labels to files for later use
np.save('extracted_features_full_isic_2019_EFFB7.npy', extracted_features)
np.save('extracted_labels_full_isic_2019_EFFB7.npy', extracted_labels)

extracted_features.shape

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Assuming the 'extracted_features.npy' and 'extracted_labels.npy' files are already saved
# Load the features and labels (skip this if already in memory)
# extracted_features = np.load('extracted_features.npy')
# extracted_labels = np.load('extracted_labels.npy')

# Step 1: Preprocess the labels (Convert one-hot encoded labels back to integers)
# If extracted_labels are one-hot encoded, we use argmax to convert back to categorical labels
labels = np.argmax(extracted_labels, axis=1)

# Step 2: Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(extracted_features, labels, test_size=0.2, random_state=42)

# Step 3: Initialize and train the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(X_train, y_train)

# Step 4: Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Step 5: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# Print a classification report for detailed performance metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))


np.unique(labels, return_counts= True)

from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)

# SMOTE requires a 2D array for features and a 1D array for labels
X_resampled, y_resampled = smote.fit_resample(extracted_features, labels)

# Step 3: Split the resampled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Step 4: Initialize and train the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the balanced dataset
rf_classifier.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Step 6: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy after SMOTE: {accuracy:.4f}")

# Print a classification report for detailed performance metrics
print("Classification Report after SMOTE:")
print(classification_report(y_test, y_pred))

from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, BatchNormalization, Dense,Dropout

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(1024, 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(64, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(8, activation='softmax')
])


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

history = model.fit(X_train_reshaped, y_train, batch_size=32, epochs=80, validation_split=0.2, verbose=1)

import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Plot training & validation accuracy values
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()

from sklearn.metrics import classification_report

y_pred_probs = model.predict(X_test_reshaped)
y_pred = np.argmax(y_pred_probs, axis=-1)  # Get the predicted class labels

# Generate classification report
print(classification_report(y_test, y_pred))

<h2> END OF EFFICIENTNETB7 </h2>

<h2> Extraction using EfficientNetB0 </h2>

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

# Parameters
img_height, img_width = 224, 224  # EfficientNetB7 input size
batch_size = 32  # Batch size for efficient memory usage
num_classes = 8  # Number of categories in your dataset

# Load the EfficientNetB7 model pre-trained on ImageNet
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Add custom layers for feature extraction
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
feature_extractor = Model(inputs=base_model.input, outputs=x)

# Initialize empty lists to store features and labels
extracted_features = []
extracted_labels = []

# Process the images and extract features in batches
for batch_images, batch_labels in image_generator:
    # Extract features for the batch of images
    features_batch = feature_extractor.predict(batch_images)
    
    # Append the extracted features and corresponding labels
    extracted_features.append(features_batch)
    extracted_labels.append(batch_labels)

    # To avoid infinite loop, break when done
    if image_generator.batch_index == image_generator.n // batch_size:
        break

# Convert the lists of features and labels into numpy arrays
extracted_features = np.vstack(extracted_features)
extracted_labels = np.vstack(extracted_labels)

# Save the extracted features and labels to files for later use
np.save('extracted_features_full_isic_2019_EFFB0.npy', extracted_features)
np.save('extracted_labels_full_isic_2019_EFFB0.npy', extracted_labels)

extracted_features.shape

labels = np.argmax(extracted_labels, axis=1)

np.unique(labels, return_counts = True)

from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)

# SMOTE requires a 2D array for features and a 1D array for labels
X_resampled, y_resampled = smote.fit_resample(extracted_features, labels)

# Step 3: Split the resampled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Step 4: Initialize and train the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the balanced dataset
rf_classifier.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Step 6: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy after SMOTE: {accuracy:.4f}")

# Print a classification report for detailed performance metrics
print("Classification Report after SMOTE:")
print(classification_report(y_test, y_pred))


from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, BatchNormalization, Dense,Dropout

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(1024, 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(64, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(8, activation='softmax')
])


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

history = model.fit(X_train_reshaped, y_train, batch_size=32, epochs=80, validation_split=0.2, verbose=1)


import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Plot training & validation accuracy values
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()

from sklearn.metrics import classification_report

y_pred_probs = model.predict(X_test_reshaped)
y_pred = np.argmax(y_pred_probs, axis=-1)  # Get the predicted class labels

# Generate classification report
print(classification_report(y_test, y_pred))

<h2> END OF EFFICIENTNETB0 </h2>

<h3> Extraction using ResNet50

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

# Set the dataset path
dataset_dir = '/kaggle/input/isic-separated-2019/separated_ISIC_2019'  # Update this path to your dataset

# Parameters
img_height, img_width = 224, 224  # ResNet50 default input size
batch_size = 32  # Batch size for efficient memory usage
num_classes = 8  # Number of categories in your dataset

# Load the ResNet50 model pre-trained on ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Add custom layers for feature extraction
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
feature_extractor = Model(inputs=base_model.input, outputs=x)

# Define the data generator for loading images in batches and apply augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input  # ResNet50 preprocessing
)

# Use flow_from_directory to load images and labels from the dataset directory in batches
image_generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',  # Generates one-hot encoded labels
    shuffle=False  # No shuffling to ensure images and labels are in order
)

# Initialize empty lists to store features and labels
extracted_features = []
extracted_labels = []

# Process the images and extract features in batches
for batch_images, batch_labels in image_generator:
    # Extract features for the batch of images
    features_batch = feature_extractor.predict(batch_images)
    
    # Append the extracted features and corresponding labels
    extracted_features.append(features_batch)
    extracted_labels.append(batch_labels)

    # To avoid infinite loop, break when done
    if image_generator.batch_index == image_generator.n // batch_size:
        break

# Convert the lists of features and labels into numpy arrays
extracted_features = np.vstack(extracted_features)
extracted_labels = np.vstack(extracted_labels)

# Save the extracted features and labels to files for later use
np.save('extracted_features_full_isic_2019_ResNet50.npy', extracted_features)
np.save('extracted_labels_full_isic_2019_ResNet50.npy', extracted_labels)

labels = np.argmax(extracted_labels, axis=1)

extracted_features.shape

np.unique(labels, return_counts = True)

from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)

# SMOTE requires a 2D array for features and a 1D array for labels
X_resampled, y_resampled = smote.fit_resample(extracted_features, labels)

# Step 3: Split the resampled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Step 4: Initialize and train the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the balanced dataset
rf_classifier.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Step 6: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy after SMOTE: {accuracy:.4f}")

# Print a classification report for detailed performance metrics
print("Classification Report after SMOTE:")
print(classification_report(y_test, y_pred))

from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, BatchNormalization, Dense,Dropout

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(1024, 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(64, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(8, activation='softmax')
])


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

history = model.fit(X_train_reshaped, y_train, batch_size=32, epochs=80, validation_split=0.2, verbose=1)

import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Plot training & validation accuracy values
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()


from sklearn.metrics import classification_report

y_pred_probs = model.predict(X_test_reshaped)
y_pred = np.argmax(y_pred_probs, axis=-1)  # Get the predicted class labels

# Generate classification report
print(classification_report(y_test, y_pred))

<h3>End of ResNet50 </h3>

<h2> Extraction using InceptionNet </h2>

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

# Set the dataset path
dataset_dir = '/kaggle/input/isic-separated-2019/separated_ISIC_2019'  # Update this path to your dataset

# Parameters
img_height, img_width = 299, 299  # InceptionV3 default input size
batch_size = 32  # Batch size for efficient memory usage
num_classes = 8  # Number of categories in your dataset

# Load the InceptionV3 model pre-trained on ImageNet
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Add custom layers for feature extraction
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
feature_extractor = Model(inputs=base_model.input, outputs=x)

# Define the data generator for loading images in batches and apply augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    preprocessing_function=tf.keras.applications.inception_v3.preprocess_input  # InceptionV3 preprocessing
)

# Use flow_from_directory to load images and labels from the dataset directory in batches
image_generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',  # Generates one-hot encoded labels
    shuffle=False  # No shuffling to ensure images and labels are in order
)

# Initialize empty lists to store features and labels
extracted_features = []
extracted_labels = []

# Process the images and extract features in batches
for batch_images, batch_labels in image_generator:
    # Extract features for the batch of images
    features_batch = feature_extractor.predict(batch_images)
    
    # Append the extracted features and corresponding labels
    extracted_features.append(features_batch)
    extracted_labels.append(batch_labels)

    # To avoid infinite loop, break when done
    if image_generator.batch_index == image_generator.n // batch_size:
        break

# Convert the lists of features and labels into numpy arrays
extracted_features = np.vstack(extracted_features)
extracted_labels = np.vstack(extracted_labels)

# Save the extracted features and labels to files for later use
np.save('extracted_features_full_isic_2019_InceptionNet.npy', extracted_features)
np.save('extracted_labels_full_isic_2019_InceptionNet.npy', extracted_labels)

labels = np.argmax(extracted_labels, axis=1)

extracted_features.shape

from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)

# SMOTE requires a 2D array for features and a 1D array for labels
X_resampled, y_resampled = smote.fit_resample(extracted_features, labels)

# Step 3: Split the resampled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Step 4: Initialize and train the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the balanced dataset
rf_classifier.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Step 6: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy after SMOTE: {accuracy:.4f}")

# Print a classification report for detailed performance metrics
print("Classification Report after SMOTE:")
print(classification_report(y_test, y_pred))

from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, BatchNormalization, Dense,Dropout

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(1024, 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(64, activation='relu'),
    BatchNormalization(),  # Add batch normalization layer
    Dropout(0.5),
    Dense(8, activation='softmax')
])


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

history = model.fit(X_train_reshaped, y_train, batch_size=32, epochs=80, validation_split=0.2, verbose=1)

import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Plot training & validation accuracy values
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()

from sklearn.metrics import classification_report

y_pred_probs = model.predict(X_test_reshaped)
y_pred = np.argmax(y_pred_probs, axis=-1)  # Get the predicted class labels

# Generate classification report
print(classification_report(y_test, y_pred))

