# MLP

In [2]:
import torch
import os
import optuna
import time
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Subset, random_split, ConcatDataset, DataLoader
from PIL import Image
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, roc_curve

import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.optimizers import Adam

import cv2

## Normalización de datos, además creación de training y testing datasets

In [3]:
"""
-- Normalize pixel values and create training and testing datasets --

This cell normalizes the pixel values and creates a training
and testing datasets considering the stratify technique.
"""
    
parent_folder_path = 'images/cropped_dataset/'
categories = {'COVID': 0, 'Lung_Opacity': 1, 'Normal': 2, 'Viral_Pneumonia': 3}
arrays = []
category_amount = []

# get the category with the least images
for category in categories.keys():
    folder_path = os.path.join(parent_folder_path, category)
    image_files = os.listdir(folder_path)
    category_amount.append(len(image_files))

max_training = min(category_amount)

# convert the images into a pytorch dataset
for cat_folder, value in categories.items():

    folder_path = os.path.join(parent_folder_path, cat_folder)
    image_files = os.listdir(folder_path)

    for i, file_name in enumerate(image_files):

        if i >= max_training: break
        file_path = os.path.join(folder_path, file_name)
        image = Image.open(file_path)
        image_array = np.array(image)

        # verify all images are of the desired size
        if image.size != (250, 250):
            print(file_path, " IS NOT 250x250, it is: ", image.size)
            continue

        if image_array.shape != (250, 250):
            image_array = np.dot(image_array[..., :3], [0.2989, 0.5870, 0.1140])

        arrays.append(image_array)

# reshape the array
arrays = np.array(arrays).astype(np.float16)
arrays = arrays/ np.max(arrays)
#image_data = np.stack(arrays, axis=0)
#image_data = image_data.reshape(len(image_data), 1, 250, 250)

arrays_labels = [0] * max_training
arrays_labels += [1] * max_training
arrays_labels += [2] * max_training
arrays_labels += [3] * max_training

arrays_labels = np.array(arrays_labels)

X_train, X_test, y_train, y_test = train_test_split(arrays, arrays_labels, test_size=0.2, random_state=42, stratify=arrays_labels)

print("Finished")

y_train = to_categorical(y_train.astype(int), num_classes=4)
y_test = to_categorical(y_test.astype(int), num_classes=4)

Finished


## Arquitectura 1 para modelo MLP

In [4]:
# Modelo de red MLP personalizado
model_mlp = Sequential()
model_mlp.add(Flatten(input_shape=(250, 250, 1)))

# Capa oculta 1
model_mlp.add(Dense(68, activation='sigmoid'))

# Capa oculta 1
model_mlp.add(Dense(30, activation='relu'))

# Capa de salida
model_mlp.add(Dense(4, activation='softmax'))

# Compilar el modelo
optimizer = Adam(learning_rate=0.00001)
model_mlp.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Entrenar el modelo
model_mlp.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1)

2023-05-24 22:39:32.865138: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2285500000 exceeds 10% of free system memory.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fd8807a2e80>

### Métricas de la Arquitectura 1

In [5]:
predictions = model_mlp.predict(X_test)
predictions = np.argmax(predictions, axis=1)
real_values = np.argmax(y_test, axis=1)

# get the metrics
print(confusion_matrix(real_values, predictions))
print("Accuracy: ", accuracy_score(real_values, predictions))
print("Precision: ", precision_score(real_values, predictions, average=None))
print("Recall: ", recall_score(real_values, predictions, average=None))
print("F1 score: ", f1_score(real_values, predictions, average=None))

# for auc and roc there is an analysis for each category
# get the accurate predictions matrix
test_label_mat = []
predictions_mat = []

covid_true = []
lung_op_true = []
normal_true = []
viral_pneu_true = []

covid_pred = []
lung_op_pred = []
normal_pred = []
viral_pneu_pred = []

for i in range(len(real_values)):
    
    # y_test
    covid_true.append(real_values[i] == 0)
    lung_op_true.append(real_values[i] == 1)
    normal_true.append(real_values[i] == 2)
    viral_pneu_true.append(real_values[i] == 3)

    # predictions
    covid_pred.append(predictions[i] == 0)
    lung_op_pred.append(predictions[i] == 1)
    normal_pred.append(predictions[i] == 2)
    viral_pneu_pred.append(predictions[i] == 3)

test_label_mat.append(covid_true)
test_label_mat.append(lung_op_true)
test_label_mat.append(normal_true)
test_label_mat.append(viral_pneu_true)

predictions_mat.append(covid_pred)
predictions_mat.append(lung_op_pred)
predictions_mat.append(normal_pred)
predictions_mat.append(viral_pneu_pred)

# print the results and make the needed graphics
auc = roc_auc_score(test_label_mat, predictions_mat, multi_class='ovo')
print("General AUC:", auc)

categories = {'COVID': 0, 'Lung_Opacity': 1, 'Normal': 2, 'Viral_Pneumonia': 3}

for category, value in categories.items():

    fpr, tpr, thresholds = roc_curve(test_label_mat[value], predictions_mat[value])

    auc = roc_auc_score(test_label_mat[value], predictions_mat[value])

    plt.plot(fpr, tpr, label=f'ROC Curve {category} (AUC = %0.2f)' % auc)
    plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing the random classifier
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic (ROC) Curve for {category}')
    plt.legend(loc='lower right')
    plt.show()

2023-05-24 22:42:38.136536: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 571500000 exceeds 10% of free system memory.


[[0.24719678 0.41066366 0.30784017 0.03429925]
 [0.22446679 0.63296264 0.03479821 0.10777239]
 [0.1166525  0.05825726 0.82011616 0.00497407]
 ...
 [0.12124888 0.35914385 0.5108744  0.00873281]
 [0.03001905 0.04282829 0.17137179 0.7557808 ]
 [0.13891777 0.7965309  0.04472766 0.01982365]]


## Arquitectura 2 para modelo MLP

In [6]:
# Modelo de red MLP personalizado
model_mlp = Sequential()
model_mlp.add(Flatten(input_shape=(250, 250, 1)))

# Capa oculta 1
model_mlp.add(Dense(68, activation='sigmoid'))

# Capa oculta 2
model_mlp.add(Dense(30, activation='relu'))

# Capa oculta 3
model_mlp.add(Dense(68, activation='relu'))

# Capa de salida
model_mlp.add(Dense(4, activation='softmax'))

# Compilar el modelo
optimizer = Adam(learning_rate=0.00001)
model_mlp.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Entrenar el modelo
model_mlp.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1)

: 

: 

### Métricas de la Arquitectura 2

In [None]:
predictions = model_mlp.predict(X_test)
predictions = np.argmax(predictions, axis=1)
real_values = np.argmax(y_test, axis=1)

# get the metrics
print(confusion_matrix(real_values, predictions))
print("Accuracy: ", accuracy_score(real_values, predictions))
print("Precision: ", precision_score(real_values, predictions, average=None))
print("Recall: ", recall_score(real_values, predictions, average=None))
print("F1 score: ", f1_score(real_values, predictions, average=None))

# for auc and roc there is an analysis for each category
# get the accurate predictions matrix
test_label_mat = []
predictions_mat = []

covid_true = []
lung_op_true = []
normal_true = []
viral_pneu_true = []

covid_pred = []
lung_op_pred = []
normal_pred = []
viral_pneu_pred = []

for i in range(len(real_values)):
    
    # y_test
    covid_true.append(real_values[i] == 0)
    lung_op_true.append(real_values[i] == 1)
    normal_true.append(real_values[i] == 2)
    viral_pneu_true.append(real_values[i] == 3)

    # predictions
    covid_pred.append(predictions[i] == 0)
    lung_op_pred.append(predictions[i] == 1)
    normal_pred.append(predictions[i] == 2)
    viral_pneu_pred.append(predictions[i] == 3)

test_label_mat.append(covid_true)
test_label_mat.append(lung_op_true)
test_label_mat.append(normal_true)
test_label_mat.append(viral_pneu_true)

predictions_mat.append(covid_pred)
predictions_mat.append(lung_op_pred)
predictions_mat.append(normal_pred)
predictions_mat.append(viral_pneu_pred)

# print the results and make the needed graphics
auc = roc_auc_score(test_label_mat, predictions_mat, multi_class='ovo')
print("General AUC:", auc)

categories = {'COVID': 0, 'Lung_Opacity': 1, 'Normal': 2, 'Viral_Pneumonia': 3}

for category, value in categories.items():

    fpr, tpr, thresholds = roc_curve(test_label_mat[value], predictions_mat[value])

    auc = roc_auc_score(test_label_mat[value], predictions_mat[value])

    plt.plot(fpr, tpr, label=f'ROC Curve {category} (AUC = %0.2f)' % auc)
    plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing the random classifier
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic (ROC) Curve for {category}')
    plt.legend(loc='lower right')
    plt.show()

## Arquitectura 3 para modelo MLP

In [None]:
# Modelo de red MLP personalizado
model_mlp = Sequential()
model_mlp.add(Flatten(input_shape=(250, 250, 1)))

# Capa oculta 1
model_mlp.add(Dense(68, activation='relu'))

# Capa oculta 2
model_mlp.add(Dense(30, activation='relu'))

# Capa oculta 3
model_mlp.add(Dense(128, activation='relu'))

# Capa de salida
model_mlp.add(Dense(4, activation='softmax'))

# Compilar el modelo
optimizer = Adam(learning_rate=0.001)
model_mlp.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Entrenar el modelo
model_mlp.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1)

### Métricas de la Arquitectura 3

In [None]:
predictions = model_mlp.predict(X_test)
predictions = np.argmax(predictions, axis=1)
real_values = np.argmax(y_test, axis=1)

# get the metrics
print(confusion_matrix(real_values, predictions))
print("Accuracy: ", accuracy_score(real_values, predictions))
print("Precision: ", precision_score(real_values, predictions, average=None))
print("Recall: ", recall_score(real_values, predictions, average=None))
print("F1 score: ", f1_score(real_values, predictions, average=None))

# for auc and roc there is an analysis for each category
# get the accurate predictions matrix
test_label_mat = []
predictions_mat = []

covid_true = []
lung_op_true = []
normal_true = []
viral_pneu_true = []

covid_pred = []
lung_op_pred = []
normal_pred = []
viral_pneu_pred = []

for i in range(len(real_values)):
    
    # y_test
    covid_true.append(real_values[i] == 0)
    lung_op_true.append(real_values[i] == 1)
    normal_true.append(real_values[i] == 2)
    viral_pneu_true.append(real_values[i] == 3)

    # predictions
    covid_pred.append(predictions[i] == 0)
    lung_op_pred.append(predictions[i] == 1)
    normal_pred.append(predictions[i] == 2)
    viral_pneu_pred.append(predictions[i] == 3)

test_label_mat.append(covid_true)
test_label_mat.append(lung_op_true)
test_label_mat.append(normal_true)
test_label_mat.append(viral_pneu_true)

predictions_mat.append(covid_pred)
predictions_mat.append(lung_op_pred)
predictions_mat.append(normal_pred)
predictions_mat.append(viral_pneu_pred)

# print the results and make the needed graphics
auc = roc_auc_score(test_label_mat, predictions_mat, multi_class='ovo')
print("General AUC:", auc)

categories = {'COVID': 0, 'Lung_Opacity': 1, 'Normal': 2, 'Viral_Pneumonia': 3}

for category, value in categories.items():

    fpr, tpr, thresholds = roc_curve(test_label_mat[value], predictions_mat[value])

    auc = roc_auc_score(test_label_mat[value], predictions_mat[value])

    plt.plot(fpr, tpr, label=f'ROC Curve {category} (AUC = %0.2f)' % auc)
    plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing the random classifier
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic (ROC) Curve for {category}')
    plt.legend(loc='lower right')
    plt.show()