In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
# Paths & URLs

import os

# Enable CUDA stacktrace reporting for debugging
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ['TORCH_USE_CUDA_DSA'] = "1"
os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = "1"
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# Directorio base
#PATH_BASE = '/content/drive/MyDrive/proximity'
#PATH_BASE = 'C:\\Users\\User\\Documents\\Proyecto Proximity'
PATH_BASE = 'E:\\files\\Documents\\Proyecto Proximity'

# Data release actual
DR70_PATH = os.path.join(PATH_BASE, 'DR70')
DR70_CT_PATH = os.path.join(DR70_PATH, 'datalake_sorted')
DR70_LABELS_PATH = os.path.join(DR70_PATH, 'labels.csv')

# Data release 176
DR176_PATH = os.path.join(PATH_BASE, 'DR176')
DR176_CT_PATH = os.path.join(DR176_PATH, 'DR176_studies')
DR176_LABELS_PATH = os.path.join(PATH_BASE, 'reports_with_label.csv')


# Data release 217
DR217_PATH = os.path.join(PATH_BASE, 'DR217')
DR217_CT_PATH = os.path.join(DR217_PATH, 'DR217_studies')
DR217_LABELS_PATH = os.path.join(DR217_PATH, 'DR217_labels.csv')
# Used for normalization of the intensity values for each CT
DR217_CT_HISTOGRAM_LANDMARKS_PATH = os.path.join(DR217_PATH, 'DR217_histogram_landmarks.npy')
# Embeddings ResNet18 DR217
DR217_RESNET_EMBEDDINGS_PATH = os.path.join(DR217_PATH, 'DR217_resnet18_embeddings')

# Synth dataset v1
SYNTH_DATASET_V1_PATH = os.path.join(PATH_BASE, 'Synth dataset v1', 'v1')

# CTs in Nibabel format
CT_NIBABEL_PATH = os.path.join(PATH_BASE, 'DR70', 'CTs')

# Embeddings visuales de CTs
#CT_EMBEDDINGS_PATH = DATA_RELEASE_PATH + '/visual_embeddings'


# Etiquetas de los CTs del data release actual
#CT_LABELS_CSV_PATH = DATA_RELEASE_PATH + '/labels.csv'

# Data release (CTs + etiquetas) organizados en un DataFrame
#CT_DATASET_DF_HDF_PATH = os.path.join(PATH_BASE, 'dataset_df.h5')
#CT_DATASET_DF_PICKLE_PATH = os.path.join(PATH_BASE, 'dataset_df.pickle')

# URLs de modelos visuales
#RESNET18_URL = 'microsoft/resnet-18'

# Path que contiene los resnet50 embeddings de CTs del data release actual
#CT_RESNET18_EMBEDDINGS_PATH = os.path.join(DR70_PATH, 'visual_embeddings', 'resnet18')
#CT_RESNET18_EMBEDDINGS_PATH = os.path.join(DR70_PATH, 'visual_embeddings', 'resnet18', 'reshaped_averaged')

# Path de modelos entrenados en base a tripletas
TRIPLET_MODELS_PATH = os.path.join(PATH_BASE, 'retrieval_models', 'triplets')
TRIPLET_CHECKPOINTS_PATH = os.path.join(PATH_BASE, 'retrieval_models', 'triplets', 'checkpoints')

# Path de modelos entrenados con clasificacion
CLASSIFICATION_MODELS_PATH = os.path.join(PATH_BASE, 'retrieval_models', 'clasification')
CLASSIFICATION_CHECKPOINTS_PATH = os.path.join(CLASSIFICATION_MODELS_PATH, 'checkpoints')

In [6]:
import torch
from torchvision.transforms import v2
from torchvision.io import read_video
from tqdm.auto import tqdm
import numpy as np
from pathlib import Path

cuda = torch.cuda.is_available()

volumes_path_list = sorted(Path(os.path.join(SYNTH_DATASET_V1_PATH, 'mp4_in')).glob('*.mp4'))

transforms_composite = v2.Compose([
    v2.Grayscale(num_output_channels=1),
    v2.Resize((100, 100)),
    v2.ToDtype(torch.float32, scale=True), # with scale=True, scales pixel values from [0, 255] to [0.0, 1.0]
    v2.Normalize(mean=[0.449], std=[0.226])
])
samples_path_list = [None]*len(volumes_path_list)
labels_list = [None]*len(volumes_path_list)

for i, p in tqdm(enumerate(volumes_path_list)):
    _, fname = os.path.split(p)
    info = fname.split('.')[0:-1]
    info = ''.join(info)
    info = info.split('_')
    fid = int(info[0])
    cube = int(info[1])
    sphere = int(info[2])
    tetrahedron = int(info[3])
    vol, _, _ = read_video(str(p), output_format="TCHW")
    vol = transforms_composite(vol).numpy()
    label = np.array([cube, sphere, tetrahedron], dtype=np.uint8)
    np.savez_compressed(os.path.join(os.path.join(SYNTH_DATASET_V1_PATH, 'npz'), f'{fid}_{cube}_{sphere}_{tetrahedron}.npz'), volume=vol, labels=label)
    print(f'Saved {fid}_{cube}_{sphere}_{tetrahedron}.npz')


0it [00:00, ?it/s]



Saved 100_0_0_0.npz
Saved 10_0_0_0.npz
Saved 11_0_0_0.npz
Saved 12_0_0_0.npz
Saved 13_0_0_0.npz
Saved 14_0_0_0.npz
Saved 15_0_0_0.npz
Saved 16_0_0_0.npz
Saved 17_0_0_0.npz
Saved 18_0_0_0.npz
Saved 19_0_0_0.npz
Saved 1_0_0_0.npz
Saved 20_0_0_0.npz
Saved 21_0_0_0.npz
Saved 22_0_0_0.npz
Saved 23_0_0_0.npz
Saved 24_0_0_0.npz
Saved 25_0_0_0.npz
Saved 26_0_0_0.npz
Saved 27_0_0_0.npz
Saved 28_0_0_0.npz
Saved 29_0_0_0.npz
Saved 2_0_0_0.npz
Saved 30_0_0_0.npz
Saved 31_0_0_0.npz
Saved 32_0_0_0.npz
Saved 33_0_0_0.npz
Saved 34_0_0_0.npz
Saved 35_0_0_0.npz
Saved 36_0_0_0.npz
Saved 37_0_0_0.npz
Saved 38_0_0_0.npz
Saved 39_0_0_0.npz
Saved 3_0_0_0.npz
Saved 40_0_0_0.npz
Saved 41_0_0_0.npz
Saved 42_0_0_0.npz
Saved 43_0_0_0.npz
Saved 44_0_0_0.npz
Saved 45_0_0_0.npz
Saved 46_0_0_0.npz
Saved 47_0_0_0.npz
Saved 48_0_0_0.npz
Saved 49_0_0_0.npz
Saved 4_0_0_0.npz
Saved 50_0_0_0.npz
Saved 51_0_0_0.npz
Saved 52_0_0_0.npz
Saved 53_0_0_0.npz
Saved 54_0_0_0.npz
Saved 55_0_0_0.npz
Saved 56_0_0_0.npz
Saved 57_0_0_0.