In [None]:
#Artificial Neural Networks
from PIL import Image
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow import keras

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import f1_score, precision_score, recall_score

#Made the mistake of flattening image to 1D array since this is CNN now


# Global variables
image_folder = "C:/Users/compsci6651/Desktop/Filtered_Images"
labels_file_path = 'C:/Users/compsci6651/PycharmProjects/SeniorSem/HAM10000_metadata.txt'
labels_dict = {}  # Assuming you have loaded this dictionary before calling the function
with open(labels_file_path, 'r', encoding='utf-8') as file:
    next(file)  # Skip the header row
    for line in file:
        parts = line.strip().split(',')
        image_id = parts[1]
        label = parts[2]
        if label in ['mel', 'nv']:
            labels_dict[image_id] = label


def load_data(image_folder, labels_dict, batch_size=32, desired_size=(120, 90)):
    """
    A generator function that loads images in batches from a specified folder.

    :param image_folder: Directory containing images.
    :param labels_dict: Dictionary mapping image filenames (without extension) to labels.
    :param batch_size: Number of images to be loaded in a single batch.
    :param desired_size: Tuple indicating the size to which images will be resized.
    :return: Yields a batch of image data and labels as NumPy arrays.
    """
    batch_data = []
    batch_labels = []
    count = 0

    for filename in os.listdir(image_folder):
        if filename.endswith('.jpg'):
            image_path = os.path.join(image_folder, filename)
            image = Image.open(image_path)
            if image.mode != 'RGB':
                image = image.convert('RGB')
            image = image.resize(desired_size)
            image_array = np.array(image, dtype=np.float32) / 255.0  # Normalization

            # Append to batch
            batch_data.append(image_array)
            image_id = filename.replace('.jpg', '')
            label = labels_dict.get(image_id, "unknown")
            batch_labels.append(label)

            count += 1
            if count == batch_size:
                # Yield a full batch
                yield np.array(batch_data), np.array(batch_labels)
                batch_data, batch_labels = [], []
                count = 0

    # Yield any remaining data as the last batch
    if batch_data:
        yield np.array(batch_data), np.array(batch_labels)


# Label Encoding
data, labels = load_data(image_folder, labels_dict)
encoder = LabelEncoder()

label_to_int = {'nv': 0, 'mel': 1}
numerical_labels = [label_to_int[label] for label in labels]

#Test split
X_temp, X_test, y_temp, y_test = train_test_split(data, numerical_labels, test_size=0.2, stratify=numerical_labels, random_state=42)
X_train, X_validate, y_train, y_validate = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

classifier = Sequential()
learning_rate = 0.0001
optimus = Adam(learning_rate = learning_rate)

# Step 1: Global Patterns
classifier.add(Conv2D(64, (3,3), activation='relu'))
classifier.add(Conv2D(64, (3,3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Step 2: Local Features
classifier.add(Conv2D(128, (3,3), activation = 'relu'))
classifier.add(Conv2D(128, (3,3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size=(2,2)))


# Fully Connected Layers
classifier.add(Flatten())
classifier.add(Dense(units=256, activation='relu'))

classifier.add(Dense(units=1, activation='sigmoid'))
classifier.compile(optimizer=optimus, loss='binary_crossentropy', metrics=['accuracy'])
classifier.summary()
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)

validate_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = train_datagen.flow(X_train, y_train, batch_size=1)
validate_generator = validate_datagen.flow(X_validate, y_validate, batch_size=1)

print(np.unique(y_train))  # should print something like [0 1]
print(np.unique(y_validate))  # should also print [0 1]s

early_stopping = EarlyStopping(monitor = 'val_loss', patience = 5, verbose = 1, mode = 'min')

history = classifier.fit(train_generator, steps_per_epoch = 4690,epochs = 10, validation_data = validate_generator, validation_steps = 1000)

test_generator = validate_datagen.flow(X_test, y_test, batch_size=1)
test_loss, test_accuracy = classifier.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
y_pred_probs = classifier.predict(test_generator)
y_pred = np.round(y_pred_probs).astype(int).flatten()

from sklearn.metrics import confusion_matrix
y_pred_binary = np.round(classifier.predict(test_generator)).astype(int)
conf_matrix = confusion_matrix(y_test, y_pred_binary)
print("Confusion Matrix:\n", conf_matrix)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# 47 - True Negative
# 1215 - False Positive
# 4 - False Negative
# 1294 - True Positive

# 5. Calculate sensitivity and specificity:
tn, fp, fn, tp = conf_matrix.ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
print(f"True Positive: {tp:.2f}")
print(f"False Positive: {fp:.2f}")
print(f"False Negative: {fn:.2f}")
print(f"True Negative: {tn:.2f}")
print(f"Sensitivity: {sensitivity:.2f}")
print(f"Specificity: {specificity:.2f}")


from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# 1. Predict the probabilistic outcomes for the validation set
# Assuming y_pred are your prediction probabilities
y_pred = classifier.predict(test_generator)
threshold = 0.7  # Adjust this threshold as needed
y_pred_class = (y_pred > threshold).astype(int)

# 2. Compute the ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_class)
roc_auc = auc(fpr, tpr)

# 3. Plot the ROC curve
plt.figure()
lw = 2  # line width
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'], label='Training Loss')
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper left')
plt.show()