In [None]:
import os
import pathlib
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass
import nibabel as nib
from nilearn import plotting
import cv2
from tqdm import tqdm
import gc

In [None]:
@dataclass(frozen=True)
class DatasetConfig:
    NUM_CLASSES: int = 43
    IMG_HEIGHT:  int = 224
    IMG_WIDTH:   int = 224
    CHANNELS:    int = 3
    SEED_VALUE:  int = 41
    CUT_PLANE:   str = "escolha do corte: sagital/coronal"
         
    DATA_ROOT_SOURCE_PATH: str = 'caminho do dataset'
    MAIN_DATA_CSV_PATH: str = DATA_ROOT_SOURCE_PATH + '\\fdg_metadata.csv'
    DATA_ROOT_TARGET:  str = f'caminho de onde será gerados as imagens{CUT_PLANE}' 
    DATA_ROOT_TRAIN:  str = DATA_ROOT_TARGET + '/Train' 
    DATA_ROOT_VALID:  str = DATA_ROOT_TARGET + '/Valid'
    DATA_ROOT_TEST:   str = DATA_ROOT_TARGET + '/Test'
    DATA_TEST_GT:     str = DATA_ROOT_TARGET + '/Test.csv'

In [None]:
random.seed(DatasetConfig.SEED_VALUE)
np.random.seed(DatasetConfig.SEED_VALUE)

In [None]:
fdg_data = pd.read_csv(DatasetConfig.MAIN_DATA_CSV_PATH)
fdg_data

In [None]:
fdg_data["Number of Images"].describe()

In [None]:
def get_image_path(record):
    prefix = record["Subject ID"].split("_")[-1]
    sufix = record["File Location"].split("/")[-2]
    folder = "labelsTr" if record["Modality"] == "SEG" else "imagesTr"
    
    final_number = ""
    if record["Modality"] == "CT":
        final_number = "_0000"
    if record["Modality"] == "PT":
        final_number = "_0001"
        
    return DatasetConfig.DATA_ROOT_SOURCE_PATH + f"/{folder}/fdg_{prefix}_{sufix}{final_number}.nii.gz"

In [None]:
fdg_data

In [None]:
pre_processed_df = fdg_data.copy()

In [None]:
pre_processed_df.diagnosis = pre_processed_df.diagnosis.apply(lambda x: "POSITIVE" if x != "NEGATIVE" else x)
pre_processed_df = pre_processed_df[["Subject ID","Study UID","Modality","Number of Images","File Location","diagnosis"]]
pre_processed_df

In [None]:
pre_processed_df["Study UID"].iloc[0]

In [None]:
single_reg_df = pre_processed_df[["Subject ID", "Study UID", "diagnosis"]].copy().drop_duplicates()
# adding columns to dataframe 
single_reg_df["sliceNum"] = None
single_reg_df["totalSlices"] = None
single_reg_df["filePath"] = None
single_reg_df["height"] = None
single_reg_df["width"] = None
single_reg_df

In [None]:
single_reg_df.diagnosis.value_counts()

In [None]:
target_data_df = single_reg_df.iloc[0:0].copy()
target_data_df

In [None]:
def get_modalities_from_study_uid(study_uid):
    row_CT = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "CT")].iloc[0]
    row_PET = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "PT")].iloc[0]
    row_SEG = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "SEG")].iloc[0]
    return row_CT, row_PET, row_SEG

In [None]:
def get_annotated_slices(seg_img, slices_num, cut_plane="coronal"):
    annotated_slices = []
    for i in range(slices_num):
        curr_image = None
        if cut_plane == "coronal":
            curr_image = seg_img[:, i, :]
        elif cut_plane == "sagital":
            curr_image = seg_img[i, :, :]
        else:
            curr_image = seg_img[:, :, i]
        
        if curr_image[curr_image != 0].size > 0: #verificação se há marcações de anotação do especialista
            annotated_slices.append(i)
    
    return annotated_slices

In [None]:
#não está sendo usada
'''
def show_all_modalities(seg_img, pet_img, ct_img):
    plt.imshow(seg_img, cmap="hot")
    plt.show()

    plt.imshow(pet_img.T, cmap="gist_yarg", origin='lower')
    plt.show()

    plt.imshow(ct_img.T, cmap="gray", origin='lower')
    plt.show()

In [None]:
def get_plane(seg, pet, ct, slice_index, plane):
    if plane == 'coronal':
        return seg[:, slice_index, :], pet[:, slice_index, :], None if ct is None else ct[:, slice_index, :]
    elif plane == 'sagital':
        return seg[slice_index, :, :], pet[slice_index, :, :], None if ct is None else ct[slice_index, :, :]
    else:
        return seg[:, :, slice_index], pet[:, :, slice_index], None if ct is None else ct[:, :, slice_index]

In [None]:
#não está sendo usada
'''
def get_full_image_from_slices(pet, ct):
    blank_channel = np.zeros((len(ct), len(ct[0]))) # gera terceiro canal da imagem vazio, preenchido de zeros
    np_img = np.dstack((ct, pet, blank_channel))
    return np.rot90(np_img, k=1, axes = (0,1))

In [None]:
def get_target_file_name(row, cut_index):
    return f"{DatasetConfig.DATA_ROOT_TARGET}\\{row['Subject ID']}-{row['Study UID']}-{cut_index}.png"

In [None]:
def get_num_slices_from_cut_plane(shape, cut_plane="coronal"):
    if cut_plane == "coronal":
        return shape[1]
    elif cut_plane == "sagital":
        return shape[0]
    else:
        return shape[2]

In [None]:
#não está sendo usada
'''
def get_equivalent_slice(origin_selected, origin_total, target_total):
    return (target_total * origin_selected) // origin_total

In [None]:
negative_df = single_reg_df[single_reg_df.diagnosis == "NEGATIVE"].copy()

In [None]:
def process_positive_element(positive_df, negative_df, target_data_path):
    records = []

    if negative_df.empty:        # Verificar se o dataframe negativo não está vazio
        raise ValueError("O dataframe de pacientes negativos está vazio. Certifique-se de que ele esteja corretamente preenchido.")

    
    negative_list = negative_df.to_dict('records') # Converter o dataframe negativo em uma lista de pacientes para rotação
    neg_index = 0  # Índice para rotação dos pacientes negativos

    # Iterar sobre cada linha do dataframe positivo
    for _, pos_row in tqdm(positive_df.iterrows(), total=positive_df.shape[0], desc="Processando Pacientes Positivos"):
        try:
            # Carregar imagens
            print(f"Carregando imagens para o paciente positivo: {pos_row['Study UID']}")
            _, pet_img, seg_img = get_modalities_from_study_uid(pos_row["Study UID"])
            pet_img = nib.load(get_image_path(pet_img)).get_fdata()
            seg_img = nib.load(get_image_path(seg_img)).get_fdata()

            num_slices = get_num_slices_from_cut_plane(seg_img.shape, DatasetConfig.CUT_PLANE) #numero total de fatias do paciente

            annotated_slices = get_annotated_slices(seg_img, num_slices, cut_plane=DatasetConfig.CUT_PLANE) #fatias com ativação

            # Selecionar um paciente negativo em rotação
            neg_row = negative_list[neg_index]
            neg_index = (neg_index + 1) % len(negative_list)  # Atualizar índice para rotação

            print(f"Carregando imagem PET para o paciente negativo: {neg_row['Study UID']}")
            _, neg_pet_img, _ = get_modalities_from_study_uid(neg_row["Study UID"])

            neg_pet_img = nib.load(get_image_path(neg_pet_img)).get_fdata()

            for slice_index in annotated_slices:
                #pacientes da classe positiva
                _, pet_slice, _ = get_plane(seg_img, pet_img, None, slice_index, DatasetConfig.CUT_PLANE)
                positive_filename = get_target_file_name(pos_row, slice_index)
                save_as_png(pet_slice, positive_filename)
                records.append((pos_row["Study UID"], pet_img.shape, slice_index, positive_filename))

                #pacientesw negativos
                _, neg_pet_slice, _ = get_plane(seg_img, neg_pet_img, None, slice_index, DatasetConfig.CUT_PLANE)
                negative_filename = get_target_file_name(neg_row, slice_index)
                save_as_png(neg_pet_slice, negative_filename)   # Salvar como PNG de 16 bits             
                records.append((neg_row["Study UID"], neg_pet_img.shape, slice_index, negative_filename)) # Adicionar registro ao dataframe

        except Exception as e:
            print(f"Erro ao processar o paciente {pos_row['Study UID']}: {e}")
        finally:
            # Limpar a memória após cada iteração
            del pet_img, seg_img, neg_pet_img
            gc.collect()

    return records

In [None]:
def save_as_png(image_slice, file_path):
    
    image_norm = cv2.normalize(image_slice, None, alpha=0, beta=65535, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_16U) # Normalizar a imagem para o intervalo de 16 bits
    
    # Salvar a imagem
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    cv2.imwrite(file_path, image_norm)

In [None]:
single_reg_df[single_reg_df.diagnosis == "POSITIVE"]

In [None]:
def add_record_to_target_df(study_uid, shape, selected_slice, image_path, cut_plane="coronal"):
    row = single_reg_df[single_reg_df["Study UID"] == study_uid].to_dict(orient='records')[0]
    row["totalSlices"] = shape[1]
    row["sliceNum"] = selected_slice
    row["filePath"] = image_path.replace(DatasetConfig.DATA_ROOT_TARGET, '') # removendo caminho absoluto, deixando apenas relativo

    # Carrega a imagem gerada
    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)  # Carrega como 16 bits
    height, width = img.shape
    
    row["height"] = height
    row["width"] = width
    
    return pd.concat([target_data_df, pd.DataFrame([row])], ignore_index=True)

In [None]:
target_data_df

In [None]:
positive_records_df = single_reg_df[single_reg_df.diagnosis == "POSITIVE"]
rows_to_iterate = positive_records_df.sample(random_state=DatasetConfig.SEED_VALUE + 123, frac=1)

records = process_positive_element(rows_to_iterate, negative_df, DatasetConfig.DATA_ROOT_TARGET)
for record in records:
    study_uid, shape, selected_slice, img_path = record
    target_data_df = add_record_to_target_df(study_uid, shape, selected_slice, img_path)

In [None]:
target_data_df

In [None]:
target_data_df.to_csv(f"{DatasetConfig.DATA_ROOT_TARGET}/data_description.csv")

In [None]:
subject_id_counts = target_data_df['Subject ID'].value_counts()
print(subject_id_counts)