In [None]:
!pip install -U segmentation-models

In [1]:
import cv2
import numpy as np
import os
from keras.callbacks import ModelCheckpoint
from segmentation_models import *
from segmentation_models.metrics import iou_score
import shutil
import pickle

def load_data(train, train_gt, test, test_gt):
    X_train = np.load(train, allow_pickle = True)
    y_train = np.expand_dims(np.load(train_gt, allow_pickle = True), axis = -1)//255
    # print(y_train.shape)
    # y_train = to_categorical(y_train, 1)
    X_test = np.load(test, allow_pickle = True)
    y_test = np.expand_dims(np.load(test_gt, allow_pickle = True), axis = -1)//255
    # print(y_test.shape)
    # y_test = to_categorical((y_test, 1), dtype = 'boolean')
    return X_train, y_train, X_test, y_test

2024-01-31 14:14:51.104011: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-31 14:14:51.184789: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-31 14:14:51.208369: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-31 14:14:51.570836: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

Segmentation Models: using `keras` framework.


In [2]:
def resize_data(old_set, shape):
    (H, W) = shape
    old_shape = old_set.shape
    new_set = np.empty(shape = (old_shape[0], H, W, old_shape[3]))
    for i in range(old_set.shape[0]):
        new_set[i] = np.expand_dims(cv2.resize(old_set[i, :, :, 0], (W, H)), axis = -1)
    
    return new_set

def resize_all(sets, shape):
    for i in range(len(sets)):
        sets[i] = resize_data(sets[i], shape)

    return sets


In [3]:

def dice_calc(im_1, im_2, empty_score=6.0):
    im1 = im_1 > .3#!= 0#.astype(np.bool)
    im2 = im_2 > .3#!= 0#.astype(np.bool)

    if im1.shape != im2.shape:
        raise ValueError("Shape mismatch: im1 and im2 must have the same shape.")

    im_sum = im1.sum() + im2.sum()
    if im_sum == 0:
        # print("empty")
        return empty_score

    # Compute dice_val coefficient
    intersection = np.logical_and(im1, im2)

    return 2. * intersection.sum() / im_sum

def sensi_speci_accu(truth, mask):
	# (TP + TN)/(TP + TN + FP + FN)
	# TPR = TP / (TP + FN)
	# SPC = TN / (FP + TN)

    thresh = .3
    C = (((mask > thresh)*2 + (truth > thresh)).reshape(-1, 1) == range(4)).sum(0)

    # sensitivity = C[3]/C[1::2].sum()
    # specificity = C[0]/C[::2].sum()

    sensitivity = C[3]/np.sum(C[1] + C[3])
    specificity = C[0]/np.sum(C[0] + C[2])
    accuracy = (C[0] + C[3])/np.sum(C)

    return sensitivity, specificity, accuracy


In [4]:

def save_masks(pred, names, output):
    """
        Salva as máscaras geradas

        Parameters
        ----------
        pred: array-like, float32
            Array contendo todas as máscaras preditas
        names: list, 'string'
            Lista de nomes dos arquivos das imagens
        output: string
            Nome do diretório onde as imagens serão salvas
            
    """
    for i in range(pred.shape[0]):
        m = pred[i, :, :, 0]
        cv2.imwrite(output + names[i] + '.png', 255 * ((m - m.min())/(m.max() - m.min())))
    

def generate_log(y_true, y_pred, names, output):
    """
        Gera um log com os valores Dice, Sensibilidade, Especificidade e Acurácia para todas as imagens do conjunto de teste

        Parameters
        ----------
        y_true: array-like, float32
            Mascara anotada manualmente (Ground-truth).
        y_pred: array-like, float32
            Mascara predita pelo modelo.
        names: list, string
            Lista com os nomes dos arquivos das imagens.
        output: string
            Caminho para o arquivo de saída.
        
        Returns
        -------
        Dice: float32
            Valor médio do IoU no intervalo [0, 1]
        Sensitivity: float32
            Valor médio da sensibilidade no intervalo [0, 1]
        Especificity: float32
            Valor médio da especificidade no intervalo [0, 1]
        Accuracy: float32
            Valor médio da acurácia no intervalo [0, 1]
    """
    
    with open(output.split('.csv')[0] + '.csv', 'w') as f:
        f.write('NOME,IOU,SENSIBILIDADE,ESPECIFICIDADE,ACURACIA\n')
        dice, sensibilidade, especificidade, acuracia = 0, 0, 0, 0

        for i in range(y_pred.shape[0]):
            iou = dice_calc(y_true[i], y_pred[i])
            s, e, a = sensi_speci_accu(y_true[i], y_pred[i])
            
            dice += iou
            sensibilidade += s
            especificidade += e
            acuracia += a

            f.write('{},{},{},{},{}\n'.format(names[i], iou, s, e, a))
        
        return dice/y_pred.shape[0], sensibilidade/y_pred.shape[0], especificidade/y_pred.shape[0], acuracia/y_pred.shape[0]

def save_statistics(filename, model_name, pre_processing_method, statistics):
	(d, s, e, a) = statistics

	with open(filename, 'a') as f:
		f.write('{},{},{},{},{},{}\n'.format(model_name, pre_processing_method, d, s, e, a))


In [5]:
def create_folder(path):
	if not os.path.exists(path):
		# shutil.rmtree(path)
	    os.makedirs(path)
 
def prepare_folders(root, model_names, preprocessing_names):
    for model_name in model_names:
        current = root + '/' + model_name

        create_folder(current)
        create_folder(current + '/logs')
        create_folder(current + '/masks')
        create_folder(current + '/weights')

        for preprocessing_name in preprocessing_names:
            current_sub = current + '/masks/' + preprocessing_name

            create_folder(current_sub)



In [6]:
'''
'Linknet': Linknet,
'FPN': FPN,
'PSPNet': PSPNet
'''    
a = {
    'Unet': Unet 
}

'''
'vgg16', 'vgg19', 
'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 
'seresnet18', 'seresnet34', 'seresnet50', 'seresnet101', 'seresnet152', 
'seresnext50', 'seresnext101',
'resnext50', 'resnext101',
'senet154',
'densenet121', 'densenet169', 'densenet201',
'inceptionv3', 'inceptionresnetv2',
'mobilenet', 'mobilenetv2',
'efficientnetb0', 'efficientnetb1', 'efficientnetb2', 'efficientnetb3', 'efficientnetb4', 'efficientnetb5', 'efficientnetb6', 'efficientnetb7'
'''

backbones = ['densenet169']


In [7]:
import segmentation_models as sm

sm.set_framework('tf.keras')

sm.framework()

main_folder =  './bases/'#'/content/drive/MyDrive/chest/segment_gabriel/bases/' 
main_models_folder =  './segm_models/'#'/content/drive/MyDrive/chest/segment_gabriel/segm_models'
X_train, y_train, X_val, y_val = load_data(main_folder + 'train_ALL_BASES_hm.pickle', main_folder + 'train_gt_ALL_BASES_hm.pickle',
                                           main_folder + 'test_ALL_BASES_hm.pickle', main_folder + 'test_gt_ALL_BASES_hm.pickle')

X_test, y_test, X_test, y_test = load_data(main_folder + 'test_Chest_hm.pickle', main_folder + 'test_gt_Chest_hm.pickle',
                                           main_folder + 'test_Chest_hm.pickle', main_folder + 'test_gt_Chest_hm.pickle')

with open('./evaluated.txt', 'r') as f: #/content/drive/MyDrive/chest/segment_gabriel/evaluated.txt'
    evaluated_models = names = [x.rstrip('\n') for x in f.readlines()]

with open('./names.txt', 'r') as f: #/content/drive/MyDrive/chest/segment_gabriel/names.txt
    names = [x.rstrip('\n') for x in f.readlines()]

prepare_folders(main_models_folder, [x[0] for x in a.items()], backbones)

for (name, Model) in a.items():
    for BACKBONE in backbones:
        if not (name + '_' + BACKBONE) in evaluated_models:
            print('TRAINING WITH MODEL ' + name + ' AND PREPROCESSING ' + BACKBONE)

            pre_processing = get_preprocessing(BACKBONE)

            if name == 'PSPNet':
                [X_train, y_train, X_val, y_val] = resize_all([ X_train, y_train, X_val, y_val], (480, 480))
                [X_test, y_test] = resize_all([ X_test, y_test], (480, 480))

            model = Model(BACKBONE, input_shape = (X_train.shape[1], X_train.shape[2], 3), classes = 1)

            xt = pre_processing(X_train)
            xv = pre_processing(X_val)
            xtest = pre_processing(X_test)
            
                
            model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy', iou_score])

            checkpoint = ModelCheckpoint('{}/{}/weights/{}_best_weights.hdf5'.format(main_models_folder, name, BACKBONE), monitor='val_iou_score', verbose=1, save_best_only=True,save_weights_only=True, mode='max')
            
            history = model.fit(
                x = xt,
                y = y_train,
                batch_size = 4,
                epochs = 60,
                callbacks = [checkpoint],
                validation_data = (xv, y_val),
            )

            pred = model.predict(xtest)
            with open('{}/{}/pred_{}.pickle'.format(main_models_folder, name, BACKBONE), 'wb') as f:
                pickle.dump(pred, f)

            statistics = generate_log(y_test, pred, names, '{}/{}/logs/{}.csv'.format(main_models_folder, name, BACKBONE))
            save_masks(pred, names, '{}/{}/masks/{}/'.format(main_models_folder, name, BACKBONE))
            save_statistics('{}/resultados'.format(main_models_folder), name, BACKBONE, statistics)

            with open('evaluated.txt', 'a') as f:
                f.write(name + '_' + BACKBONE + '\n')


TRAINING WITH MODEL Unet AND PREPROCESSING densenet169


2024-01-31 14:15:50.177767: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-31 14:15:50.520898: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-31 14:15:50.522278: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-31 14:15:50.524224: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the approp

Downloading data from https://github.com/keras-team/keras-applications/releases/download/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/60


2024-01-31 14:17:01.852261: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8204
2024-01-31 14:17:06.591547: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-01-31 14:17:11.719994: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




: 

#Isolando os testes

Aqui com a mesma base de homologação, usando o melhor modelo obtido

In [None]:
from keras.models import load_model, Model

#predict
main_folder = '/content/drive/MyDrive/chest/segment_gabriel/bases/'
main_models_folder = '/content/drive/MyDrive/chest/segment_gabriel/segm_models'
X_test, y_test, X_test, y_test = load_data(main_folder + 'test_Chest_hm.pickle', main_folder + 'test_gt_Chest_hm.pickle',
                                           main_folder + 'test_Chest_hm.pickle', main_folder + 'test_gt_Chest_hm.pickle')
with open('/content/drive/MyDrive/chest/segment_gabriel/names.txt', 'r') as f:
    names = [x.rstrip('\n') for x in f.readlines()]
    
BACKBONE = 'densenet169'
name = 'Unet'

model = Unet(BACKBONE, input_shape = (X_train.shape[1], X_train.shape[2], 3), classes = 1)
model.load_weights('/content/drive/MyDrive/chest/segment_gabriel/best_weights_best_results/best_weightsUnet_densenet169_best_weights.hdf5')

pred = model.predict(xtest)
with open('{}/{}/pred_{}.pickle'.format(main_models_folder, name, BACKBONE), 'wb') as f:
    pickle.dump(pred, f)

statistics = generate_log(y_test, pred, names, '{}/{}/logs/{}.csv'.format(main_models_folder, name, BACKBONE))
save_masks(pred, names, '{}/{}/masks/{}/'.format(main_models_folder, name, BACKBONE))
save_statistics('{}/resultados'.format(main_models_folder), name, BACKBONE, statistics)

with open('evaluated.txt', 'a') as f:
    f.write(name + '_' + BACKBONE + '\n')

#Montando a base completa da Chest X-Ray
Pegando a base inteira e obtendo a segmentação com o melhor modelo


In [None]:
!pip install kaggle
import os
os.environ['KAGGLE_USERNAME'] = "geraldobraz"
os.environ['KAGGLE_KEY'] = "fbca5960f1451ba427d6a9397840aa67"
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
!unzip -q chest-xray-pneumonia.zip

Downloading chest-xray-pneumonia.zip to /content
100% 2.29G/2.29G [00:32<00:00, 30.0MB/s]
100% 2.29G/2.29G [00:32<00:00, 75.1MB/s]


In [None]:
!rm -rf chest_xray_seg 
!mkdir chest_xray_seg
!mkdir chest_xray_seg/train
!mkdir chest_xray_seg/test
!mkdir chest_xray_seg/val
!mkdir chest_xray_seg/train/NORMAL
!mkdir chest_xray_seg/train/PNEUMONIA
!mkdir chest_xray_seg/val/NORMAL
!mkdir chest_xray_seg/val/PNEUMONIA
!mkdir chest_xray_seg/test/NORMAL
!mkdir chest_xray_seg/test/PNEUMONIA

!rm -rf chest_xray_mask
!mkdir chest_xray_mask
!mkdir chest_xray_mask/train
!mkdir chest_xray_mask/test
!mkdir chest_xray_mask/val
!mkdir chest_xray_mask/train/NORMAL
!mkdir chest_xray_mask/train/PNEUMONIA
!mkdir chest_xray_mask/val/NORMAL
!mkdir chest_xray_mask/val/PNEUMONIA
!mkdir chest_xray_mask/test/NORMAL
!mkdir chest_xray_mask/test/PNEUMONIA

In [None]:
#gerando as mascaras
import os
import glob
import cv2
import numpy as np
import segmentation_models as sm
from segmentation_models import *
from segmentation_models.metrics import iou_score

sm.set_framework('tf.keras')
sm.framework()

#parametros globais
base_folder = '/content/chest_xray/'
image_format = '.jpeg'

BACKBONE = 'densenet169'
name = 'Unet'

model = Unet(BACKBONE, input_shape = (X_train.shape[1], X_train.shape[2], 3), classes = 1)
model.load_weights('/content/drive/MyDrive/chest/segment_gabriel/best_weights_best_results/best_weightsUnet_densenet169_best_weights.hdf5')
pre_processing = get_preprocessing(BACKBONE)

def get_folders(data_base):
  data_folders = []
  for name in os.listdir(data_base):
    if(os.path.isdir(data_base + name)):
      data_folders.append(name)
  print(data_folders)

  return data_folders

def extract(spec):
	classes_folders = get_folders(base_folder + spec)

	data = []
	labels = []
	for f in classes_folders:
		print(base_folder + spec + f + "/*" + image_format)
		dataset = glob.glob(base_folder + spec + f + "/*" + image_format)
		for arq in dataset:			
			im = cv2.imread(arq)
			im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			im = cv2.resize(im, (512,512))
			im = np.reshape(im, (1, 512, 512, 3))
			#pre_process do backbone
			im = pre_processing(im)
			#predição
			pred = model.predict(im)

			m = pred[0, :, :, 0]
			m =  255 * ((m - m.min())/(m.max() - m.min()))
			m = m.astype('uint8')

			f = im[0, :, :, 0]
			f =  255 * ((f - f.min())/(f.max() - f.min()))
			f = f.astype('uint8')

			final = cv2.bitwise_and(f, f, mask=m)

			#salvando a mascara
			cv2.imwrite(arq.replace("chest_xray", "chest_xray_mask") + '.png', m)
			#salvando a imagem
			cv2.imwrite(arq.replace("chest_xray", "chest_xray_seg"), final)

			#data.append(im)
			#labels.append(f)
	#return np.asarray(data), np.asarray(labels)

extract('train/')
extract('val/')
extract('test/')
'''
X_train, y_train = extract('train/')
print(X_train.shape)
print(y_train.shape)

X_val, y_val = extract('val/')
print(X_val.shape)
print(y_val.shape)

X_test, y_test = extract('test/')
print(X_test.shape)
print(y_test.shape)
'''

['NORMAL', 'PNEUMONIA']
/content/chest_xray/train/NORMAL/*.jpeg
/content/chest_xray/train/PNEUMONIA/*.jpeg
['NORMAL', 'PNEUMONIA']
/content/chest_xray/val/NORMAL/*.jpeg
/content/chest_xray/val/PNEUMONIA/*.jpeg
['NORMAL', 'PNEUMONIA']
/content/chest_xray/test/NORMAL/*.jpeg
/content/chest_xray/test/PNEUMONIA/*.jpeg


"\nX_train, y_train = extract('train/')\nprint(X_train.shape)\nprint(y_train.shape)\n\nX_val, y_val = extract('val/')\nprint(X_val.shape)\nprint(y_val.shape)\n\nX_test, y_test = extract('test/')\nprint(X_test.shape)\nprint(y_test.shape)\n"

In [None]:
!tar -czf chest_xray_mask.tar.gz chest_xray_mask/
!tar -czf chest_xray_seg.tar.gz chest_xray_seg/
!mv *.tar.gz /content/drive/MyDrive/chest/segment_gabriel

#Montando a base completa da Covid
Pegando a base inteira e obtendo a segmentação com o melhor modelo

In [None]:
!pip install kaggle
import os
os.environ['KAGGLE_USERNAME'] = "geraldobraz"
os.environ['KAGGLE_KEY'] = "fbca5960f1451ba427d6a9397840aa67"
!kaggle datasets download -d tawsifurrahman/covid19-radiography-database
!unzip -q covid19-radiography-database.zip

Downloading covid19-radiography-database.zip to /content
 98% 729M/745M [00:06<00:00, 120MB/s]
100% 745M/745M [00:06<00:00, 123MB/s]
mv: cannot stat 'COVID-19 Radiography Database': No such file or directory


In [None]:
!mv COVID-19_Radiography_Dataset/Viral\ Pneumonia COVID-19_Radiography_Dataset/Viral
!rm -rf COVID-19_Radiography_Dataset_mask
!mkdir COVID-19_Radiography_Dataset_mask
!mkdir COVID-19_Radiography_Dataset_mask/COVID
!mkdir COVID-19_Radiography_Dataset_mask/Lung_Opacity
!mkdir COVID-19_Radiography_Dataset_mask/Normal
!mkdir COVID-19_Radiography_Dataset_mask/Viral

!rm -rf COVID-19_Radiography_Dataset_seg
!mkdir COVID-19_Radiography_Dataset_seg
!mkdir COVID-19_Radiography_Dataset_seg/COVID
!mkdir COVID-19_Radiography_Dataset_seg/Lung_Opacity
!mkdir COVID-19_Radiography_Dataset_seg/Normal
!mkdir COVID-19_Radiography_Dataset_seg/Viral

mv: cannot stat 'COVID-19_Radiography_Dataset/Viral Pneumonia': No such file or directory


In [None]:
#gerando as mascaras
import os
import glob
import cv2
import numpy as np
import segmentation_models as sm
from segmentation_models import *
from segmentation_models.metrics import iou_score

sm.set_framework('tf.keras')
sm.framework()

#parametros globais
base_folder = '/content/COVID-19_Radiography_Dataset/'
image_format = '.png'

BACKBONE = 'densenet169'
name = 'Unet'

model = Unet(BACKBONE, input_shape = (X_train.shape[1], X_train.shape[2], 3), classes = 1)
model.load_weights('/content/drive/MyDrive/chest/segment_gabriel/best_weights_best_results/best_weightsUnet_densenet169_best_weights.hdf5')
pre_processing = get_preprocessing(BACKBONE)

def get_folders(data_base):
  data_folders = []
  for name in os.listdir(data_base):
    if(os.path.isdir(data_base + name)):
      data_folders.append(name)
  print(data_folders)

  return data_folders

def extract(spec):
	classes_folders = get_folders(base_folder + spec)

	data = []
	labels = []
	for f in classes_folders:
		print(base_folder + spec + f + "/*" + image_format)
		dataset = glob.glob(base_folder + spec + f + "/*" + image_format)
		for arq in dataset:			
			im = cv2.imread(arq)
			im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			im = cv2.resize(im, (512,512))
			im = np.reshape(im, (1, 512, 512, 3))
			#pre_process do backbone
			im = pre_processing(im)
			#predição
			pred = model.predict(im)

			m = pred[0, :, :, 0]
			m =  255 * ((m - m.min())/(m.max() - m.min()))
			m = m.astype('uint8')

			f = im[0, :, :, 0]
			f =  255 * ((f - f.min())/(f.max() - f.min()))
			f = f.astype('uint8')

			final = cv2.bitwise_and(f, f, mask=m)

			#salvando a mascara
			cv2.imwrite(arq.replace("COVID-19_Radiography_Dataset", "COVID-19_Radiography_Dataset_mask") + '.png', m)
			#salvando a imagem
			cv2.imwrite(arq.replace("COVID-19_Radiography_Dataset", "COVID-19_Radiography_Dataset_seg"), final)

			#data.append(im)
			#labels.append(f)
	#return np.asarray(data), np.asarray(labels)

extract('')


'''
X_train, y_train = extract('train/')
print(X_train.shape)
print(y_train.shape)

X_val, y_val = extract('val/')
print(X_val.shape)
print(y_val.shape)

X_test, y_test = extract('test/')
print(X_test.shape)
print(y_test.shape)
'''

['Lung_Opacity', 'Viral', 'Normal', 'COVID']
/content/COVID-19_Radiography_Dataset/Lung_Opacity/*.png
/content/COVID-19_Radiography_Dataset/Viral/*.png
/content/COVID-19_Radiography_Dataset/Normal/*.png
/content/COVID-19_Radiography_Dataset/COVID/*.png


"\nX_train, y_train = extract('train/')\nprint(X_train.shape)\nprint(y_train.shape)\n\nX_val, y_val = extract('val/')\nprint(X_val.shape)\nprint(y_val.shape)\n\nX_test, y_test = extract('test/')\nprint(X_test.shape)\nprint(y_test.shape)\n"

In [None]:
!tar -czf COVID-19_Radiography_Dataset_mask.tar.gz COVID-19_Radiography_Dataset_mask/
!tar -czf COVID-19_Radiography_Dataset_seg.tar.gz COVID-19_Radiography_Dataset_seg/
!mv *.tar.gz /content/drive/MyDrive/chest/segment_gabriel