In [888]:
!pip install nibabel nilearn opencv-python

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [889]:
import os
import pathlib
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass
import nibabel as nib
from nilearn import plotting
import cv2

In [890]:
@dataclass(frozen=True)
class DatasetConfig:
    NUM_CLASSES: int = 43
    IMG_HEIGHT:  int = 224
    IMG_WIDTH:   int = 224
    CHANNELS:    int = 3
    SEED_VALUE:  int = 41
    CUT_PLANE:   str = "coronal"
         
    DATA_ROOT_SOURCE_PATH: str = '/home/jovyan/data/auto-pet-iii/2024-05-10_Autopet_v1.1'
    MAIN_DATA_CSV_PATH: str = DATA_ROOT_SOURCE_PATH + '/fdg_metadata.csv'
    DATA_ROOT_TARGET:  str = f'/home/jovyan/data/pre-processed-autopet-min-{CUT_PLANE}' 
    DATA_ROOT_TRAIN:  str = DATA_ROOT_TARGET + '/Train' 
    DATA_ROOT_VALID:  str = DATA_ROOT_TARGET + '/Valid'
    DATA_ROOT_TEST:   str = DATA_ROOT_TARGET + '/Test'
    DATA_TEST_GT:     str = DATA_ROOT_TARGET + '/Test.csv'

In [891]:
random.seed(DatasetConfig.SEED_VALUE)
np.random.seed(DatasetConfig.SEED_VALUE)

In [892]:
fdg_data = pd.read_csv(DatasetConfig.MAIN_DATA_CSV_PATH)
fdg_data

Unnamed: 0,Series UID,Collection,3rd Party Analysis,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,SOP Class Name,SOP Class UID,Number of Images,File Size,File Location,Download Timestamp,diagnosis,age,sex
0,1.3.6.1.4.1.14519.5.2.1.4219.6651.201203836389...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.75 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:16.75,MELANOMA,063Y,F
1,1.3.6.1.4.1.14519.5.2.1.4219.6651.685145825998...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,326,105.82 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:44.865,MELANOMA,063Y,F
2,1.3.6.1.4.1.14519.5.2.1.4219.6651.151532511484...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,305,99.01 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:45.761,MELANOMA,061Y,M
3,1.3.6.1.4.1.14519.5.2.1.4219.6651.313152023416...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.31 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:47.839,MELANOMA,061Y,M
4,1.3.6.1.4.1.14519.5.2.1.4219.6651.331726561677...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.52 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:53.415,MELANOMA,063Y,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3037,1.3.6.1.4.1.14519.5.2.1.4219.6651.268371672619...,FDG-PET-CT-Lesions,NO,,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,PET-CT Ganzkoerper primaer mit KM,3/31/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.53 MB,./FDG-PET-CT-Lesions/PETCT_ff1451316e/03-31-20...,2022-03-17T21:50:46.509,NEGATIVE,073Y,M
3038,1.3.6.1.4.1.14519.5.2.1.4219.6651.236102552014...,FDG-PET-CT-Lesions,NO,,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,Unspecified CT ABDOMEN,12/29/2002,GK p.v.1 WF,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,1214,641.06 MB,./FDG-PET-CT-Lesions/PETCT_fe705ea1cc/12-29-20...,2022-03-17T21:51:00.132,LYMPHOMA,066Y,F
3039,1.3.6.1.4.1.14519.5.2.1.4219.6651.330497593503...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,11.13 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:04.996,NEGATIVE,073Y,F
3040,1.3.6.1.4.1.14519.5.2.1.4219.6651.117635362952...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,GK p.v.3 s,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,645,340.68 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:26.506,NEGATIVE,073Y,F


In [893]:
fdg_data["Number of Images"].describe()

count    3042.000000
mean      301.432281
std       297.575955
min         1.000000
25%         1.000000
50%       326.000000
75%       391.000000
max      2651.000000
Name: Number of Images, dtype: float64

In [894]:
def get_image_path(record):
    prefix = record["Subject ID"].split("_")[-1]
    sufix = record["File Location"].split("/")[-2]
    folder = "labelsTr" if record["Modality"] == "SEG" else "imagesTr"
    
    final_number = ""
    if record["Modality"] == "CT":
        final_number = "_0000"
    if record["Modality"] == "PT":
        final_number = "_0001"
        
    return DatasetConfig.DATA_ROOT_SOURCE_PATH + f"/{folder}/fdg_{prefix}_{sufix}{final_number}.nii.gz"

Target Folder Tree:
- 0
  - subject_id-study_uid-image_index.png
- 1
  - subject_id-study_uid-image_index.png

In [895]:
fdg_data

Unnamed: 0,Series UID,Collection,3rd Party Analysis,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,SOP Class Name,SOP Class UID,Number of Images,File Size,File Location,Download Timestamp,diagnosis,age,sex
0,1.3.6.1.4.1.14519.5.2.1.4219.6651.201203836389...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.75 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:16.75,MELANOMA,063Y,F
1,1.3.6.1.4.1.14519.5.2.1.4219.6651.685145825998...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,326,105.82 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:44.865,MELANOMA,063Y,F
2,1.3.6.1.4.1.14519.5.2.1.4219.6651.151532511484...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,PET corr.,SIEMENS,PT,Positron Emission Tomography Image Storage,1.2.840.10008.5.1.4.1.1.128,305,99.01 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:45.761,MELANOMA,061Y,M
3,1.3.6.1.4.1.14519.5.2.1.4219.6651.313152023416...,FDG-PET-CT-Lesions,NO,,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PET-CT Ganzkoerper primaer mit KM,8/13/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,6.31 MB,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,2022-03-17T12:53:47.839,MELANOMA,061Y,M
4,1.3.6.1.4.1.14519.5.2.1.4219.6651.331726561677...,FDG-PET-CT-Lesions,NO,,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PET-CT Ganzkoerper primaer mit KM,3/23/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.52 MB,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,2022-03-17T12:53:53.415,MELANOMA,063Y,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3037,1.3.6.1.4.1.14519.5.2.1.4219.6651.268371672619...,FDG-PET-CT-Lesions,NO,,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,PET-CT Ganzkoerper primaer mit KM,3/31/2003,GK p.v.3,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,391,206.53 MB,./FDG-PET-CT-Lesions/PETCT_ff1451316e/03-31-20...,2022-03-17T21:50:46.509,NEGATIVE,073Y,M
3038,1.3.6.1.4.1.14519.5.2.1.4219.6651.236102552014...,FDG-PET-CT-Lesions,NO,,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,Unspecified CT ABDOMEN,12/29/2002,GK p.v.1 WF,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,1214,641.06 MB,./FDG-PET-CT-Lesions/PETCT_fe705ea1cc/12-29-20...,2022-03-17T21:51:00.132,LYMPHOMA,066Y,F
3039,1.3.6.1.4.1.14519.5.2.1.4219.6651.330497593503...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,Segmentation,QIICR,SEG,Segmentation Storage,1.2.840.10008.5.1.4.1.1.66.4,1,11.13 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:04.996,NEGATIVE,073Y,F
3040,1.3.6.1.4.1.14519.5.2.1.4219.6651.117635362952...,FDG-PET-CT-Lesions,NO,,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,PET-CT Ganzkoerper primaer mit KM,9/22/2005,GK p.v.3 s,SIEMENS,CT,CT Image Storage,1.2.840.10008.5.1.4.1.1.2,645,340.68 MB,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,2022-03-17T21:51:26.506,NEGATIVE,073Y,F


In [896]:
pre_processed_df = fdg_data.copy()

In [897]:
pre_processed_df.label = pre_processed_df.label.apply(lambda x: "POSITIVE" if x != "NEGATIVE" else x)
pre_processed_df = pre_processed_df[["Subject ID","Study UID","Modality","Number of Images","File Location","diagnosis","label"]]
pre_processed_df

Unnamed: 0,Subject ID,Study UID,Modality,Number of Images,File Location,diagnosis
0,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,SEG,1,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,POSITIVE
1,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,PT,326,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,POSITIVE
2,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,PT,305,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,POSITIVE
3,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,SEG,1,./FDG-PET-CT-Lesions/PETCT_01140d52d8/08-13-20...,POSITIVE
4,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,CT,391,./FDG-PET-CT-Lesions/PETCT_0011f3deaf/03-23-20...,POSITIVE
...,...,...,...,...,...,...
3037,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,CT,391,./FDG-PET-CT-Lesions/PETCT_ff1451316e/03-31-20...,NEGATIVE
3038,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,CT,1214,./FDG-PET-CT-Lesions/PETCT_fe705ea1cc/12-29-20...,POSITIVE
3039,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,SEG,1,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,NEGATIVE
3040,PETCT_ff39795341,1.3.6.1.4.1.14519.5.2.1.4219.6651.253687934564...,CT,645,./FDG-PET-CT-Lesions/PETCT_ff39795341/09-22-20...,NEGATIVE


In [898]:
pre_processed_df["Study UID"].iloc[0]

'1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478618826979455610445'

In [899]:
single_reg_df = pre_processed_df[["Subject ID", "Study UID", "diagnosis","label"]].copy().drop_duplicates()
# adding columns to dataframe 
single_reg_df["sliceNum"] = None
single_reg_df["totalSlices"] = None
single_reg_df["filePath"] = None
single_reg_df["height"] = None
single_reg_df["width"] = None
single_reg_df

Unnamed: 0,Subject ID,Study UID,diagnosis,sliceNum,totalSlices,filePath,height,width
0,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,POSITIVE,,,,,
2,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,POSITIVE,,,,,
5,PETCT_0117d7f11f,1.3.6.1.4.1.14519.5.2.1.4219.6651.316799646612...,POSITIVE,,,,,
8,PETCT_0143bab87a,1.3.6.1.4.1.14519.5.2.1.4219.6651.335264930457...,NEGATIVE,,,,,
11,PETCT_01682f60c3,1.3.6.1.4.1.14519.5.2.1.4219.6651.628573049819...,POSITIVE,,,,,
...,...,...,...,...,...,...,...,...
3027,PETCT_fde79b6aa9,1.3.6.1.4.1.14519.5.2.1.4219.6651.960887139192...,POSITIVE,,,,,
3029,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.319090403244...,POSITIVE,,,,,
3031,PETCT_fe705ea1cc,1.3.6.1.4.1.14519.5.2.1.4219.6651.972577895642...,NEGATIVE,,,,,
3035,PETCT_ff1451316e,1.3.6.1.4.1.14519.5.2.1.4219.6651.172967091722...,NEGATIVE,,,,,


In [900]:
single_reg_df.diagnosis.value_counts()

diagnosis
NEGATIVE    513
POSITIVE    501
Name: count, dtype: int64

In [901]:
target_data_df = single_reg_df.iloc[0:0].copy()
target_data_df

Unnamed: 0,Subject ID,Study UID,diagnosis,sliceNum,totalSlices,filePath,height,width


In [902]:
def get_modalities_from_study_uid(study_uid):
    row_CT = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "CT")].iloc[0]
    row_PET = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "PT")].iloc[0]
    row_SEG = pre_processed_df[(pre_processed_df["Study UID"] == study_uid) & (pre_processed_df["Modality"] == "SEG")].iloc[0]
    return row_CT, row_PET, row_SEG

In [903]:
def get_highest_activation_seg(seg_img, slices_num):
    selected_slice = 0
    curr_max_pixels = 0
    pixel_count_list = []
    for i in range(slices_num):
        curr_image = seg_img[:, i, :]
        num_of_non_null_pixels = curr_image[curr_image != 0].size
        pixel_count_list.append(num_of_non_null_pixels)
        if num_of_non_null_pixels > curr_max_pixels:
            curr_max_pixels = num_of_non_null_pixels
            selected_slice = i

    # print(len(pixel_count_list))
    return selected_slice

In [904]:
def show_all_modalities(seg_img, pet_img, ct_img):
    plt.imshow(seg_img, cmap="hot")
    plt.show()

    plt.imshow(pet_img.T, cmap="gist_yarg", origin='lower')
    plt.show()

    plt.imshow(ct_img.T, cmap="gray", origin='lower')
    plt.show()

In [905]:
def get_plane(seg, pet, ct, slice_index, plane):
    if plane == 'coronal':
        return seg[:, slice_index, :], pet[:, slice_index, :], ct[:, slice_index, :]
    elif plane == 'sagital':
        return seg[slice_index, :, :], pet[slice_index, :, :], ct[slice_index, :, :]
    else:
        return seg[:, :, slice_index], pet[:, :, slice_index], ct[:, :, slice_index]

In [906]:
def get_full_image_from_slices(pet, ct):
    blank_channel = np.zeros((len(ct), len(ct[0]))) # gera terceiro canal da imagem vazio, preenchido de zeros
    np_img = np.dstack((ct, pet, blank_channel))
    return np.rot90(np_img, k=1, axes = (0,1))

In [907]:
def get_target_file_name(row, cut_index):
    return f"{DatasetConfig.DATA_ROOT_TARGET}/{row['Subject ID']}-{row['Study UID']}-{cut_index}.png"

In [908]:
def get_num_slices_from_cut_plane(shape, cut_plane="coronal"):
    if cut_plane == "coronal":
        return shape[1]
    elif cut_plane == "sagital":
        return shape[0]
    else:
        return shape[2]

In [909]:
def get_equivalent_slice(origin_selected, origin_total, target_total):
    return (target_total * origin_selected) // target_total

In [910]:
def process_positive_element(study_uid, cut_plane="coronal"):
    # process a positive sample
    positive_CT, positive_PET, positive_SEG = get_modalities_from_study_uid(study_uid)
    
    seg_nib = nib.load(get_image_path(positive_SEG))
        
    seg_img = seg_nib.get_fdata()

    # print('shape of pos image: ' + str(seg_nib.shape))
    slices_num = get_num_slices_from_cut_plane(seg_nib.shape,cut_plane)
    
    selected_slice = get_highest_activation_seg(seg_img, slices_num)
    pos_selected_slice = selected_slice

    pet_nib = nib.load(get_image_path(positive_PET))
    pet_img = pet_nib.get_fdata()

    ct_nib = nib.load(get_image_path(positive_CT))
    ct_img = ct_nib.get_fdata()
    
    pos_seg_cut, pos_pet_cut, pos_ct_cut = get_plane(seg_img, pet_img, ct_img, selected_slice, cut_plane)
    
    # show_all_modalities(pos_seg_cut, pos_pet_cut, pos_ct_cut) # plots each modality of image
    
    combined_positive_image = get_full_image_from_slices(pos_pet_cut, pos_ct_cut)
    pos_img_path = get_target_file_name(positive_CT, selected_slice)
    
    cv2.imwrite(pos_img_path, combined_positive_image)

    # process negative random data sample
    random_neg_sample = single_reg_df[(single_reg_df.diagnosis == "NEGATIVE") & (~single_reg_df["Study UID"].isin(target_data_df["Study UID"]))].sample(random_state=DatasetConfig.SEED_VALUE).iloc[0]

    neg_CT, neg_PET, neg_SEG = get_modalities_from_study_uid(random_neg_sample["Study UID"])

    neg_seg_nib = nib.load(get_image_path(neg_SEG))
    neg_seg_img = neg_seg_nib.get_fdata()
    
    neg_pet_nib = nib.load(get_image_path(neg_PET))
    neg_pet_img = neg_pet_nib.get_fdata()

    neg_ct_nib = nib.load(get_image_path(neg_CT))
    neg_ct_img = neg_ct_nib.get_fdata()

    # print('shape of neg image: ' + str(neg_ct_nib.shape))
    slices_num_neg = get_num_slices_from_cut_plane(neg_ct_nib.shape,cut_plane)

    neg_selected_slice = get_equivalent_slice(pos_selected_slice, slices_num, slices_num_neg)

    neg_seg_cut, neg_pet_cut, neg_ct_cut = get_plane(neg_seg_img, neg_pet_img, neg_ct_img, neg_selected_slice, cut_plane)

    # show_all_modalities(neg_seg_cut, neg_pet_cut, neg_ct_cut) # plots each modality of image
   
    combined_negative_image = get_full_image_from_slices(neg_pet_cut, neg_ct_cut)
    neg_img_path = get_target_file_name(neg_CT, neg_selected_slice)
    
    cv2.imwrite(neg_img_path, combined_negative_image)
    
    return study_uid, ct_nib.shape, pos_selected_slice, pos_img_path, random_neg_sample["Study UID"], neg_ct_nib.shape, neg_selected_slice, neg_img_path
    
    # positive_file_path = get_image_path(row)

In [None]:
def process_only_positive_pet_element_seg(study_uid, cut_plane="coronal"):
    # process a positive sample
    positive_CT, positive_PET, positive_SEG = get_modalities_from_study_uid(study_uid)
    
    seg_nib = nib.load(get_image_path(positive_SEG))
        
    seg_img = seg_nib.get_fdata()

    # print('shape of pos image: ' + str(seg_nib.shape))
    slices_num = get_num_slices_from_cut_plane(seg_nib.shape,cut_plane)
    
    selected_slice = get_highest_activation_seg(seg_img, slices_num)
    pos_selected_slice = selected_slice

    pet_nib = nib.load(get_image_path(positive_PET))
    pet_img = pet_nib.get_fdata()

    ct_nib = nib.load(get_image_path(positive_CT))
    ct_img = ct_nib.get_fdata()
    
    pos_seg_cut, pos_pet_cut, pos_ct_cut = get_plane(seg_img, pet_img, ct_img, selected_slice, cut_plane)
    
    # show_all_modalities(pos_seg_cut, pos_pet_cut, pos_ct_cut) # plots each modality of image
    
    combined_positive_image = get_full_image_from_slices(pos_pet_cut, pos_ct_cut)
    pos_img_path = get_target_file_name(positive_CT, selected_slice)
    
    cv2.imwrite(pos_img_path, combined_positive_image)
    
    return study_uid, ct_nib.shape, pos_selected_slice, pos_img_path
    
    # positive_file_path = get_image_path(row)

In [911]:
single_reg_df[single_reg_df.diagnosis == "POSITIVE"]

Unnamed: 0,Subject ID,Study UID,diagnosis,sliceNum,totalSlices,filePath,height,width
0,PETCT_0011f3deaf,1.3.6.1.4.1.14519.5.2.1.4219.6651.389860614478...,POSITIVE,,,,,
2,PETCT_01140d52d8,1.3.6.1.4.1.14519.5.2.1.4219.6651.316964892168...,POSITIVE,,,,,
5,PETCT_0117d7f11f,1.3.6.1.4.1.14519.5.2.1.4219.6651.316799646612...,POSITIVE,,,,,
11,PETCT_01682f60c3,1.3.6.1.4.1.14519.5.2.1.4219.6651.628573049819...,POSITIVE,,,,,
14,PETCT_0168f65af8,1.3.6.1.4.1.14519.5.2.1.4219.6651.304086288813...,POSITIVE,,,,,
...,...,...,...,...,...,...,...,...
3009,PETCT_fbd907a179,1.3.6.1.4.1.14519.5.2.1.4219.6651.114084559043...,POSITIVE,,,,,
3015,PETCT_fcdbe15200,1.3.6.1.4.1.14519.5.2.1.4219.6651.365313945021...,POSITIVE,,,,,
3016,PETCT_fde66dd53a,1.3.6.1.4.1.14519.5.2.1.4219.6651.194294292917...,POSITIVE,,,,,
3027,PETCT_fde79b6aa9,1.3.6.1.4.1.14519.5.2.1.4219.6651.960887139192...,POSITIVE,,,,,


In [912]:
def add_record_to_target_df(study_uid, shape, selected_slice, image_path, cut_plane="coronal"):
    row = single_reg_df[single_reg_df["Study UID"] == study_uid].to_dict(orient='records')[0]
    # print(row)
    row["totalSlices"] = shape[1]
    row["sliceNum"] = selected_slice
    row["filePath"] = image_path.replace(DatasetConfig.DATA_ROOT_TARGET, '') # removing root absolute path, leaving only relative path

    img = cv2.imread(image_path)
    height, width, channels = img.shape
    
    row["height"] = height
    row["width"] = width
    
    return pd.concat([target_data_df, pd.DataFrame([row])], ignore_index=True)

In [913]:
target_data_df

Unnamed: 0,Subject ID,Study UID,diagnosis,sliceNum,totalSlices,filePath,height,width


In [914]:
from tqdm import tqdm

In [915]:
positive_records_df = single_reg_df[single_reg_df.diagnosis == "POSITIVE"]
rows_to_iterate = positive_records_df.sample(random_state=DatasetConfig.SEED_VALUE +123, frac=1)

for index, row in tqdm(rows_to_iterate.iterrows(), total=len(rows_to_iterate)):
    pos_study_uid, pos_shape, pos_selected_slice, pos_img_path, neg_study_uid, neg_shape, neg_selected_slice, neg_img_path = process_positive_element(row["Study UID"], DatasetConfig.CUT_PLANE)
    target_data_df = add_record_to_target_df(pos_study_uid, pos_shape, pos_selected_slice, pos_img_path)
    target_data_df = add_record_to_target_df(neg_study_uid, neg_shape, neg_selected_slice, neg_img_path)
    # break

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 501/501 [29:24<00:00,  3.52s/it]


In [916]:
target_data_df

Unnamed: 0,Subject ID,Study UID,diagnosis,sliceNum,totalSlices,filePath,height,width
0,PETCT_f21755a99b,1.3.6.1.4.1.14519.5.2.1.4219.6651.631135993785...,POSITIVE,196,400,/PETCT_f21755a99b-1.3.6.1.4.1.14519.5.2.1.4219...,284,400
1,PETCT_176bdc5388,1.3.6.1.4.1.14519.5.2.1.4219.6651.908703633623...,NEGATIVE,196,400,/PETCT_176bdc5388-1.3.6.1.4.1.14519.5.2.1.4219...,274,400
2,PETCT_605369e88d,1.3.6.1.4.1.14519.5.2.1.4219.6651.161111150372...,POSITIVE,207,400,/PETCT_605369e88d-1.3.6.1.4.1.14519.5.2.1.4219...,326,400
3,PETCT_846c1af245,1.3.6.1.4.1.14519.5.2.1.4219.6651.158562682786...,NEGATIVE,207,400,/PETCT_846c1af245-1.3.6.1.4.1.14519.5.2.1.4219...,326,400
4,PETCT_7ce196485f,1.3.6.1.4.1.14519.5.2.1.4219.6651.226902065774...,POSITIVE,215,400,/PETCT_7ce196485f-1.3.6.1.4.1.14519.5.2.1.4219...,558,400
...,...,...,...,...,...,...,...,...
997,PETCT_193dea6ac7,1.3.6.1.4.1.14519.5.2.1.4219.6651.224241512383...,NEGATIVE,191,400,/PETCT_193dea6ac7-1.3.6.1.4.1.14519.5.2.1.4219...,242,400
998,PETCT_80ccbdadf9,1.3.6.1.4.1.14519.5.2.1.4219.6651.478619815683...,POSITIVE,181,400,/PETCT_80ccbdadf9-1.3.6.1.4.1.14519.5.2.1.4219...,326,400
999,PETCT_3bce0eb7aa,1.3.6.1.4.1.14519.5.2.1.4219.6651.222796109751...,NEGATIVE,181,400,/PETCT_3bce0eb7aa-1.3.6.1.4.1.14519.5.2.1.4219...,303,400
1000,PETCT_3ba0277c0c,1.3.6.1.4.1.14519.5.2.1.4219.6651.146051428537...,POSITIVE,212,400,/PETCT_3ba0277c0c-1.3.6.1.4.1.14519.5.2.1.4219...,308,400


In [917]:
target_data_df.to_csv(f"{DatasetConfig.DATA_ROOT_TARGET}/data_description.csv")

In [918]:
!ls /home/jovyan/work/data/pre-processed-autopet-min-coronal/