<a href="https://colab.research.google.com/github/jsansao/idl/blob/main/TrabalhoPratico3_2023_2_CNN_MedMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trabalho Prático 3 - 2023-2
## Redes convolucionais, bases do MedMNIST

Neste trabalho vamos explorar as bases do [MedMNIST](https://medmnist.com/).

A MedNIST v2 é um dataset de larga escala inspirado no MNIST. É uma coleção de dados biomédicos padronizados em 2D e 3D. As imagens tem dimensões 28x28 (monocromáticas ou RGB).

Para esse trabalho, vamos optar por bases de imagens 2D para efetuar classificação binária ou multi-classe.

O site do MedNIST mostra maiores detalhes sobre as bases.

A idéia é que você escolha uma das bases e efetue a tarefa de treinar as redes nas seguintes condições:

1. Rede convolucional simples
2. Rede convolucional complexa (multiplas camadas e mais recursos adicionais)
3. Treinamento com AutoKeras

A idéia é obter o melhor modelo para cada um dos items. Para isso, considere a melhor acurácia de validação de cada modelo treinado.

Para escolher a base, utilize a seguinte referência, baseada no último dígito da sua matrícula.

1. 'pathmnist',
2. 'dermamnist',
3. 'octmnist',
4. 'pneumoniamnist',
5. 'bloodmnist',
6. 'tissuemnist',
7. 'organamnist',
8. 'organcmnist'
9. 'organsmnist'

ou se for 0. 'breastmnist',



# Instalação do MedMNIST



In [None]:
!pip install medmnist

In [None]:
# @title Escolha o dataset do MedMNIST
# @markdown Escolha no menu abaixo:
data_flag = "breastmnist"  # @param ['pathmnist', 'dermamnist', 'octmnist', 'pneumoniamnist', 'breastmnist', 'bloodmnist', 'tissuemnist', 'organamnist', 'organcmnist', 'organsmnist']
# @markdown ---

# Modelo CNN simples

## Importação do dataset

In [None]:
import os
import time

import medmnist
import numpy as np
import tensorflow as tf
from medmnist import INFO, Evaluator
from medmnist.info import DEFAULT_ROOT
from tensorflow.keras.models import load_model

from tensorflow.keras.utils import to_categorical

In [None]:
input_root = DEFAULT_ROOT
output_root = './images'


info = INFO[data_flag]
task = info['task']
_ = getattr(medmnist, INFO[data_flag]['python_class'])(split="train", root=input_root, download=True)

n_classes = len(info['label'])
n_channels = info['n_channels']

output_root = os.path.join(output_root, data_flag, time.strftime("%y%m%d_%H%M%S"))

if not os.path.isdir(output_root):
  os.makedirs(output_root)

npz_file = np.load(os.path.join(input_root, "{}.npz".format(data_flag)))

x_train = npz_file['train_images']
y_train = npz_file['train_labels']
x_val = npz_file['val_images']
y_val = npz_file['val_labels']
x_test = npz_file['test_images']
y_test = npz_file['test_labels']


#main(data_flag, num_trials, input_root, output_root, gpu_ids, run, model_path)

## Data Augmentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
rank = x_train.ndim

if rank < 4:
  x_train = x_train[..., np.newaxis]
  x_test = x_test[..., np.newaxis]
  x_val = x_val[..., np.newaxis]



In [None]:
datagen_train = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen_test = ImageDataGenerator(rescale=1.0/255)
# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen_train.fit(x_train)
datagen_test.fit(x_test)


## Criação das categorias

In [None]:
info['label']

In [None]:

y_train = to_categorical(y_train,n_classes)
y_test = to_categorical(y_test,n_classes)

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, n_channels)),
  tf.keras.layers.MaxPooling2D(2, 2),
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2, 2),
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2, 2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(n_classes, activation='softmax')

])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(datagen_train.flow(x_train, y_train),
         validation_data=datagen_test.flow(x_test, y_test),
         epochs=10)


# Modelo CNN com mais recursos

In [None]:
import tensorflow as tf


import keras
from keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.datasets import cifar10
from keras import regularizers, optimizers
import numpy as np
from matplotlib import pyplot

In [None]:
import os
import time

import medmnist
import numpy as np
import tensorflow as tf
from medmnist import INFO, Evaluator
from medmnist.info import DEFAULT_ROOT
from tensorflow.keras.models import load_model

from tensorflow.keras.utils import to_categorical

In [None]:
input_root = DEFAULT_ROOT
output_root = './images'


info = INFO[data_flag]
task = info['task']
_ = getattr(medmnist, INFO[data_flag]['python_class'])(split="train", root=input_root, download=True)

n_classes = len(info['label'])
n_channels = info['n_channels']


output_root = os.path.join(output_root, data_flag, time.strftime("%y%m%d_%H%M%S"))

if not os.path.isdir(output_root):
  os.makedirs(output_root)

npz_file = np.load(os.path.join(input_root, "{}.npz".format(data_flag)))

x_train = npz_file['train_images']
y_train = npz_file['train_labels']
x_val = npz_file['val_images']
y_val = npz_file['val_labels']
x_test = npz_file['test_images']
y_test = npz_file['test_labels']


y_train = to_categorical(y_train,n_classes)
y_test = to_categorical(y_test,n_classes)
#main(data_flag, num_trials, input_root, output_root, gpu_ids, run, model_path)

In [None]:
rank = x_train.ndim

if rank < 4:
  x_train = x_train[..., np.newaxis]
  x_test = x_test[..., np.newaxis]
  x_val = x_val[..., np.newaxis]

In [None]:
datagen_train = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False)


datagen_test = ImageDataGenerator(rescale=1.0/255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen_train.fit(x_train)
datagen_test.fit(x_test)


In [None]:
# build the model

# number of hidden units variable
# we are declaring this variable here and use it in our CONV layers to make it easier to update from one place
base_hidden_units = 32

# l2 regularization hyperparameter
weight_decay = 1e-4

# instantiate an empty sequential model
model = Sequential()

# CONV1
# notice that we defined the input_shape here because this is the first CONV layer.
# we don’t need to do that for the remaining layers
model.add(Conv2D(base_hidden_units, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(BatchNormalization())

# CONV2
model.add(Conv2D(base_hidden_units, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

# CONV3
model.add(Conv2D(2*base_hidden_units, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())

# CONV4
model.add(Conv2D(2*base_hidden_units, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))

# CONV5
model.add(Conv2D(4*base_hidden_units, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())

# CONV6
model.add(Conv2D(4*base_hidden_units, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.4))

# FC7
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

# print model summary
model.summary()

opt = tf.keras.optimizers.RMSprop(learning_rate=0.0003)

model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

batch_size = 64
epochs=125



In [None]:
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
history = model.fit(datagen_train.flow(x_train, y_train,batch_size=batch_size),
         validation_data=datagen_test.flow(x_test, y_test),
         steps_per_epoch=x_train.shape[0] / batch_size,
         epochs=epochs, callbacks=[model_checkpoint_callback])


# Exploração com Autokeras

In [None]:
!pip install autokeras
import autokeras as ak

In [None]:


input_root = DEFAULT_ROOT
output_root = './images'


info = INFO[data_flag]
task = info['task']
_ = getattr(medmnist, INFO[data_flag]['python_class'])(split="train", root=input_root, download=True)

n_classes = len(info['label'])
n_channels = info['n_channels']

output_root = os.path.join(output_root, data_flag, time.strftime("%y%m%d_%H%M%S"))

if not os.path.isdir(output_root):
  os.makedirs(output_root)

npz_file = np.load(os.path.join(input_root, "{}.npz".format(data_flag)))

x_train = npz_file['train_images']
y_train = npz_file['train_labels']
x_val = npz_file['val_images']
y_val = npz_file['val_labels']
x_test = npz_file['test_images']
y_test = npz_file['test_labels']

In [None]:
# Initialize the image classifier.
clf = ak.ImageClassifier(overwrite=True, max_trials=1)
clf.fit(
    x_train,
    y_train,
    # Use your own validation set.
    validation_data=(x_test, y_test),
    epochs=5,
)