In [1]:
!pip install nibabel nilearn opencv-python

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
import os
import pathlib
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass
import nibabel as nib
from nilearn import plotting
import cv2

In [3]:
@dataclass(frozen=True)
class DatasetConfig:
    NUM_CLASSES: int = 43
    IMG_HEIGHT:  int = 224
    IMG_WIDTH:   int = 224
    CHANNELS:    int = 3
    SEED_VALUE:  int = 41
    CUT_PLANE:   str = "coronal"
    # for this experiment, only sagital and coronal were applied
         
    DATA_ROOT_SOURCE_PATH: str = '/home/jovyan/data/auto-pet-iii/2024-05-10_Autopet_v1.1'
    MAIN_DATA_CSV_PATH: str = DATA_ROOT_SOURCE_PATH + '/fdg_metadata.csv'
    DATA_ROOT_TARGET:  str = f'/home/jovyan/data/segmentation/autopet-full-{CUT_PLANE}' 
    DATA_ROOT_TRAIN:  str = DATA_ROOT_TARGET + '/Train' 
    DATA_ROOT_VALID:  str = DATA_ROOT_TARGET + '/Valid'
    DATA_ROOT_TEST:   str = DATA_ROOT_TARGET + '/Test'
    DATA_TEST_GT:     str = DATA_ROOT_TARGET + '/Test.csv'

In [4]:
random.seed(DatasetConfig.SEED_VALUE)
np.random.seed(DatasetConfig.SEED_VALUE)

In [5]:
fdg_data = pd.read_csv(DatasetConfig.MAIN_DATA_CSV_PATH)
fdg_data

Unnamed: 0,Series UID,Collection,3rd Party Analysis,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,SOP Class Name,SOP Class UID,Number of Images,File Size,File Location,Download Timestamp,diagnosis,age,sex
0,1.3.6.1.4.1.14519.5.2.1.4219.6651.201203836389...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.75 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:16.75,MELANOMA,063Y,F
1,1.3.6.1.4.1.14519.5.2.1.4219.6651.685145825998...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,326,105.82 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:44.865,MELANOMA,063Y,F
2,1.3.6.1.4.1.14519.5.2.1.4219.6651.151532511484...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,305,99.01 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:45.761,MELANOMA,061Y,M
3,1.3.6.1.4.1.14519.5.2.1.4219.6651.313152023416...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.31 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:47.839,MELANOMA,061Y,M
4,1.3.6.1.4.1.14519.5.2.1.4219.6651.331726561677...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.52 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:53.415,MELANOMA,063Y,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3037,1.3.6.1.4.1.14519.5.2.1.4219.6651.268371672619...,FDG-PET-CT-Lesions,NO,,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,PET-CT Ganzkoerper primaer mit KM,3/31/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.53 MB,./FDG-PET-CT-Lesions/PETCT_ff1451316e/03-31-20...,2022-03-17T21:50:46.509,NEGATIVE,073Y,M
3038,1.3.6.1.4.1.14519.5.2.1.4219.6651.236102552014...,FDG-PET-CT-Lesions,NO,,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,Unspecified CT ABDOMEN,12/29/2002,GK p.v.1 WF,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,1214,641.06 MB,./FDG-PET-CT-Lesions/PETCT_fe705ea1cc/12-29-20...,2022-03-17T21:51:00.132,LYMPHOMA,066Y,F
3039,1.3.6.1.4.1.14519.5.2.1.4219.6651.330497593503...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,11.13 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:04.996,NEGATIVE,073Y,F
3040,1.3.6.1.4.1.14519.5.2.1.4219.6651.117635362952...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,GK p.v.3 s,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,645,340.68 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:26.506,NEGATIVE,073Y,F


In [6]:
fdg_data["Number of Images"].describe()

count    3042.000000
mean      301.432281
std       297.575955
min         1.000000
25%         1.000000
50%       326.000000
75%       391.000000
max      2651.000000
Name: Number of Images, dtype: float64

In [7]:
def get_image_path(record):
    prefix = record["Subject ID"].split("_")[-1]
    sufix = record["File Location"].split("/")[-2]
    folder = "labelsTr" if record["Modality"] == "SEG" else "imagesTr"
    
    final_number = ""
    if record["Modality"] == "CT":
        final_number = "_0000"
    if record["Modality"] == "PT":
        final_number = "_0001"
        
    return DatasetConfig.DATA_ROOT_SOURCE_PATH + f"/{folder}/fdg_{prefix}_{sufix}{final_number}.nii.gz"

Target Folder Tree:
- 0
  - subject_id-study_uid-image_index.png
- 1
  - subject_id-study_uid-image_index.png

In [8]:
fdg_data

Unnamed: 0,Series UID,Collection,3rd Party Analysis,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,SOP Class Name,SOP Class UID,Number of Images,File Size,File Location,Download Timestamp,diagnosis,age,sex
0,1.3.6.1.4.1.14519.5.2.1.4219.6651.201203836389...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.75 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:16.75,MELANOMA,063Y,F
1,1.3.6.1.4.1.14519.5.2.1.4219.6651.685145825998...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,326,105.82 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:44.865,MELANOMA,063Y,F
2,1.3.6.1.4.1.14519.5.2.1.4219.6651.151532511484...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,305,99.01 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:45.761,MELANOMA,061Y,M
3,1.3.6.1.4.1.14519.5.2.1.4219.6651.313152023416...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.31 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:47.839,MELANOMA,061Y,M
4,1.3.6.1.4.1.14519.5.2.1.4219.6651.331726561677...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.52 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:53.415,MELANOMA,063Y,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3037,1.3.6.1.4.1.14519.5.2.1.4219.6651.268371672619...,FDG-PET-CT-Lesions,NO,,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,PET-CT Ganzkoerper primaer mit KM,3/31/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.53 MB,./FDG-PET-CT-Lesions/PETCT_ff1451316e/03-31-20...,2022-03-17T21:50:46.509,NEGATIVE,073Y,M
3038,1.3.6.1.4.1.14519.5.2.1.4219.6651.236102552014...,FDG-PET-CT-Lesions,NO,,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,Unspecified CT ABDOMEN,12/29/2002,GK p.v.1 WF,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,1214,641.06 MB,./FDG-PET-CT-Lesions/PETCT_fe705ea1cc/12-29-20...,2022-03-17T21:51:00.132,LYMPHOMA,066Y,F
3039,1.3.6.1.4.1.14519.5.2.1.4219.6651.330497593503...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,11.13 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:04.996,NEGATIVE,073Y,F
3040,1.3.6.1.4.1.14519.5.2.1.4219.6651.117635362952...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,GK p.v.3 s,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,645,340.68 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:26.506,NEGATIVE,073Y,F


In [9]:
pre_processed_df = fdg_data.copy()

In [10]:
pre_processed_df["condition"] = pre_processed_df.diagnosis
pre_processed_df.diagnosis = pre_processed_df.diagnosis.apply(lambda x: "POSITIVE" if x != "NEGATIVE" else x)
pre_processed_df = pre_processed_df[["Subject ID","Study UID","Modality","Number of Images","File Location","diagnosis","condition"]]
pre_processed_df

Unnamed: 0,Subject ID,Study UID,Modality,Number of Images,File Location,diagnosis,condition
0,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,SEG,1,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,POSITIVE,MELANOMA
1,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PT,326,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,POSITIVE,MELANOMA
2,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PT,305,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,POSITIVE,MELANOMA
3,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,SEG,1,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,POSITIVE,MELANOMA
4,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,CT,391,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,POSITIVE,MELANOMA
...,...,...,...,...,...,...,...
3037,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,CT,391,./FDG-PET-CT-Lesions/PETCT_ff1451316e/03-31-20...,NEGATIVE,NEGATIVE
3038,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,CT,1214,./FDG-PET-CT-Lesions/PETCT_fe705ea1cc/12-29-20...,POSITIVE,LYMPHOMA
3039,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,SEG,1,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,NEGATIVE,NEGATIVE
3040,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,CT,645,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,NEGATIVE,NEGATIVE


In [11]:
pre_processed_df["Study UID"].iloc[0]

'1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478618826979455610445'

In [12]:
single_reg_df = pre_processed_df[["Subject ID", "Study UID", "diagnosis","condition"]].copy().drop_duplicates()
# adding columns to dataframe 
single_reg_df["sliceNum"] = None
single_reg_df["totalSlices"] = None
single_reg_df["imagePath"] = None
single_reg_df["segPath"] = None
single_reg_df["height"] = None
single_reg_df["width"] = None
single_reg_df

Unnamed: 0,Subject ID,Study UID,diagnosis,condition,sliceNum,totalSlices,imagePath,segPath,height,width
0,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,POSITIVE,MELANOMA,,,,,,
2,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,POSITIVE,MELANOMA,,,,,,
5,PETCT_0117d7f11f,1.3.6.1.4.1.14519.5.2.1.4219.6651.316799646612...,POSITIVE,LUNG_CANCER,,,,,,
8,PETCT_0143bab87a,1.3.6.1.4.1.14519.5.2.1.4219.6651.335264930457...,NEGATIVE,NEGATIVE,,,,,,
11,PETCT_01682f60c3,1.3.6.1.4.1.14519.5.2.1.4219.6651.628573049819...,POSITIVE,LUNG_CANCER,,,,,,
...,...,...,...,...,...,...,...,...,...,...
3027,PETCT_fde79b6aa9,1.3.6.1.4.1.14519.5.2.1.4219.6651.960887139192...,POSITIVE,MELANOMA,,,,,,
3029,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,POSITIVE,LYMPHOMA,,,,,,
3031,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.972577895642...,NEGATIVE,NEGATIVE,,,,,,
3035,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,NEGATIVE,NEGATIVE,,,,,,


In [13]:
single_reg_df.diagnosis.value_counts()

diagnosis
NEGATIVE    513
POSITIVE    501
Name: count, dtype: int64

In [14]:
single_reg_df.condition.value_counts()

condition
NEGATIVE       513
MELANOMA       188
LUNG_CANCER    168
LYMPHOMA       145
Name: count, dtype: int64

In [15]:
target_data_df = single_reg_df.iloc[0:0].copy()
target_data_df

Unnamed: 0,Subject ID,Study UID,diagnosis,condition,sliceNum,totalSlices,imagePath,segPath,height,width


In [16]:
def get_modalities_from_study_uid(study_uid):
    row_CT = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "CT")].iloc[0]
    row_PET = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "PT")].iloc[0]
    row_SEG = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "SEG")].iloc[0]
    return row_CT, row_PET, row_SEG

In [17]:
def get_highest_activation_seg(seg_img, slices_num, cut_plane="coronal"):
    selected_slice = 0
    curr_max_pixels = 0
    pixel_count_list = []
    for i in range(slices_num):
        curr_image = None
        if cut_plane == "coronal":
            curr_image = seg_img[:, i, :]
        elif cut_plane == "sagital":
            curr_image = seg_img[i, :, :]
        else:
            curr_image = seg_img[:, :, i]
        num_of_non_null_pixels = curr_image[curr_image != 0].size
        pixel_count_list.append(num_of_non_null_pixels)
        if num_of_non_null_pixels > curr_max_pixels:
            curr_max_pixels = num_of_non_null_pixels
            selected_slice = i

    # print(len(pixel_count_list))
    return selected_slice

In [18]:
def get_all_activation_segments(seg_img, slices_num, cut_plane="coronal"):
    selected_slices = []
    for i in range(slices_num):
        curr_image = None
        if cut_plane == "coronal":
            curr_image = seg_img[:, i, :]
        elif cut_plane == "sagital":
            curr_image = seg_img[i, :, :]
        else:
            curr_image = seg_img[:, :, i]
        num_of_non_null_pixels = curr_image[curr_image != 0].size
        if num_of_non_null_pixels > 0:
            selected_slices.append(i)

    # print(len(pixel_count_list))
    return selected_slices

In [19]:
def show_all_modalities(seg_img, pet_img, ct_img):
    # plt.imshow(seg_img, cmap="hot")
    rotated_seg_map = np.rot90(seg_img, k=1, axes = (0,1))
    plt.imshow(rotated_seg_map, cmap="hot")
    plt.show()

    # rotated_pet_image = np.rot90(pet_img.T, k=2, axes = (0,1))
    # rotated_pet_image = np.flip(rotated_pet_image)
    rotated_pet_image = pet_img.T
    plt.imshow(rotated_pet_image, cmap="gist_yarg", origin='lower')
    plt.show()

    # plt.imshow(ct_img.T, cmap="gray", origin='lower')
    # plt.show()

In [20]:
def get_plane(seg, pet, ct, slice_index, plane):
    if plane == 'coronal':
        return seg[:, slice_index, :], pet[:, slice_index, :], ct[:, slice_index, :]
    elif plane == 'sagital':
        return seg[slice_index, :, :], pet[slice_index, :, :], ct[slice_index, :, :]
    else:
        return seg[:, :, slice_index], pet[:, :, slice_index], ct[:, :, slice_index]

In [21]:
def get_full_image_from_slices(pet, ct):
    blank_channel = np.zeros((len(ct), len(ct[0]))) # gera terceiro canal da imagem vazio, preenchido de zeros
    np_img = np.dstack((ct, pet, blank_channel))
    return np.rot90(np_img, k=1, axes = (0,1))

In [22]:
def get_target_file_name(row, cut_index, is_segmentation=False):
    return f"{DatasetConfig.DATA_ROOT_TARGET}/{row['Subject ID']}-{row['Study UID']}{'-SEG' if is_segmentation else ''}-{cut_index}.png"

In [23]:
def get_num_slices_from_cut_plane(shape, cut_plane="coronal"):
    if cut_plane == "coronal":
        return shape[1]
    elif cut_plane == "sagital":
        return shape[0]
    else:
        return shape[2]

In [24]:
def get_equivalent_slice(origin_selected, origin_total, target_total):
    return (target_total * origin_selected) // origin_total

In [25]:
def process_positive_element_segmentation(study_uid, cut_plane="coronal"):
    # process a positive sample
    positive_CT, positive_PET, positive_SEG = get_modalities_from_study_uid(study_uid)
    
    seg_nib = nib.load(get_image_path(positive_SEG))
        
    seg_img = seg_nib.get_fdata()

    # print('shape of pos image: ' + str(seg_nib.shape))
    slices_num = get_num_slices_from_cut_plane(seg_nib.shape,cut_plane)
    
    selected_slices = get_all_activation_segments(seg_img, slices_num, cut_plane)
    pos_selected_slice = selected_slices

    pet_nib = nib.load(get_image_path(positive_PET))
    pet_img = pet_nib.get_fdata()

    ct_nib = nib.load(get_image_path(positive_CT))
    ct_img = ct_nib.get_fdata()

    all_images = []

    # print(str(len(selected_slices)) + " selected slices") 
    
    for selected_slice in selected_slices:
        pos_seg_cut, pos_pet_cut, pos_ct_cut = get_plane(seg_img, pet_img, ct_img, selected_slice, cut_plane)

        # show_all_modalities(pos_seg_cut, pos_pet_cut, pos_ct_cut) # plots each modality of image
        
        pos_img_path = get_target_file_name(positive_PET, selected_slice)
        pos_seg_path = get_target_file_name(positive_PET, selected_slice, True)
    
        # rotate and save images
        cv2.imwrite(pos_img_path, np.rot90(pos_pet_cut, k=1, axes = (0,1)))
        cv2.imwrite(pos_seg_path, np.rot90(pos_seg_cut, k=1, axes = (0,1)))
        
        all_images.append({
            "pos_selected_slice": selected_slice,
            "pos_img_path": pos_img_path,
            "pos_seg_path": pos_seg_path
        })
        
    # return study_uid, pet_nib.shape, pos_selected_slice, pos_img_path, pos_seg_path
    return study_uid, pet_nib.shape, all_images
    
    # positive_file_path = get_image_path(row)

In [26]:
single_reg_df[single_reg_df.diagnosis == "POSITIVE"]

Unnamed: 0,Subject ID,Study UID,diagnosis,condition,sliceNum,totalSlices,imagePath,segPath,height,width
0,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,POSITIVE,MELANOMA,,,,,,
2,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,POSITIVE,MELANOMA,,,,,,
5,PETCT_0117d7f11f,1.3.6.1.4.1.14519.5.2.1.4219.6651.316799646612...,POSITIVE,LUNG_CANCER,,,,,,
11,PETCT_01682f60c3,1.3.6.1.4.1.14519.5.2.1.4219.6651.628573049819...,POSITIVE,LUNG_CANCER,,,,,,
14,PETCT_0168f65af8,1.3.6.1.4.1.14519.5.2.1.4219.6651.304086288813...,POSITIVE,LUNG_CANCER,,,,,,
...,...,...,...,...,...,...,...,...,...,...
3009,PETCT_fbd907a179,1.3.6.1.4.1.14519.5.2.1.4219.6651.114084559043...,POSITIVE,LYMPHOMA,,,,,,
3015,PETCT_fcdbe15200,1.3.6.1.4.1.14519.5.2.1.4219.6651.365313945021...,POSITIVE,MELANOMA,,,,,,
3016,PETCT_fde66dd53a,1.3.6.1.4.1.14519.5.2.1.4219.6651.194294292917...,POSITIVE,MELANOMA,,,,,,
3027,PETCT_fde79b6aa9,1.3.6.1.4.1.14519.5.2.1.4219.6651.960887139192...,POSITIVE,MELANOMA,,,,,,


In [27]:
def add_record_to_target_df(study_uid, shape, selected_slice, image_path, seg_path, cut_plane="coronal"):
    row = single_reg_df[single_reg_df["Study UID"] == study_uid].to_dict(orient='records')[0]
    row["totalSlices"] = shape[1]
    row["sliceNum"] = selected_slice

    # removing root absolute path, leaving only relative path
    row["imagePath"] = image_path.replace(DatasetConfig.DATA_ROOT_TARGET, '')
    row["segPath"] = seg_path.replace(DatasetConfig.DATA_ROOT_TARGET, '')

    img = cv2.imread(image_path)
    height, width, channels = img.shape
    
    row["height"] = height
    row["width"] = width
    
    return pd.concat([target_data_df, pd.DataFrame([row])], ignore_index=True)

In [28]:
target_data_df

Unnamed: 0,Subject ID,Study UID,diagnosis,condition,sliceNum,totalSlices,imagePath,segPath,height,width


In [29]:
from tqdm import tqdm

In [30]:
positive_records_df = single_reg_df[single_reg_df.diagnosis == "POSITIVE"]
rows_to_iterate = positive_records_df.sample(random_state=DatasetConfig.SEED_VALUE +123, frac=1)

for index, row in tqdm(rows_to_iterate.iterrows(), total=len(rows_to_iterate)):
    # pos_study_uid, pos_shape, pos_selected_slice, pos_img_path, pos_seg_path = process_positive_element_segmentation(row["Study UID"], DatasetConfig.CUT_PLANE)
    pos_study_uid, pos_shape, all_images = process_positive_element_segmentation(row["Study UID"], DatasetConfig.CUT_PLANE)
    for image_data in all_images:
        target_data_df = add_record_to_target_df(pos_study_uid, pos_shape, image_data["pos_selected_slice"], image_data["pos_img_path"], image_data["pos_seg_path"], DatasetConfig.CUT_PLANE)
    # target_data_df = add_record_to_target_df(neg_study_uid, neg_shape, neg_selected_slice, neg_img_path)
    # break

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 501/501 [20:11<00:00,  2.42s/it]


In [31]:
target_data_df

Unnamed: 0,Subject ID,Study UID,diagnosis,condition,sliceNum,totalSlices,imagePath,segPath,height,width
0,PETCT_f21755a99b,1.3.6.1.4.1.14519.5.2.1.4219.6651.631135993785...,POSITIVE,LYMPHOMA,157,400,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,284,400
1,PETCT_f21755a99b,1.3.6.1.4.1.14519.5.2.1.4219.6651.631135993785...,POSITIVE,LYMPHOMA,158,400,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,284,400
2,PETCT_f21755a99b,1.3.6.1.4.1.14519.5.2.1.4219.6651.631135993785...,POSITIVE,LYMPHOMA,159,400,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,284,400
3,PETCT_f21755a99b,1.3.6.1.4.1.14519.5.2.1.4219.6651.631135993785...,POSITIVE,LYMPHOMA,160,400,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,284,400
4,PETCT_f21755a99b,1.3.6.1.4.1.14519.5.2.1.4219.6651.631135993785...,POSITIVE,LYMPHOMA,161,400,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,284,400
...,...,...,...,...,...,...,...,...,...,...
25965,PETCT_3ba0277c0c,1.3.6.1.4.1.14519.5.2.1.4219.6651.146051428537...,POSITIVE,MELANOMA,211,400,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,308,400
25966,PETCT_3ba0277c0c,1.3.6.1.4.1.14519.5.2.1.4219.6651.146051428537...,POSITIVE,MELANOMA,212,400,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,308,400
25967,PETCT_3ba0277c0c,1.3.6.1.4.1.14519.5.2.1.4219.6651.146051428537...,POSITIVE,MELANOMA,213,400,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,308,400
25968,PETCT_3ba0277c0c,1.3.6.1.4.1.14519.5.2.1.4219.6651.146051428537...,POSITIVE,MELANOMA,214,400,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,308,400


In [32]:
target_data_df.to_csv(f"{DatasetConfig.DATA_ROOT_TARGET}/data_description.csv")