First, we'll import pytorch and check if a GPU is available.

In [194]:
import torch
import random
import numpy as np
from pathlib import Path
from torch.utils.data import Dataset

if torch.cuda.is_available():
    print('GPU available')
else:
    print('CPU only')

CPU only


Next, define the path to the data we're using

In [150]:
data_path = Path('C:\\Users\\dohert01\\PycharmProjects\\qPAI_cINN_uncertainty_estimation\\datasets')
experiment_name = "FlowPhantom_insilico_complicated"

Let's have a look at the data. First of all, borrow some normalisation functions...

In [151]:
def spectrum_normalisation(spectrum):
    """Applies z-score scaling to the initial pressure spectrum"""
    mean = np.mean(spectrum)
    std = np.std(spectrum)
    norm = (spectrum - mean)/std
    return norm

def spectrum_processing(spectrum, allowed_datapoints):
    """Returns a normalised initial pressure spectrum with some of the values zeroed out"""
    num_non_zero_datapoints = random.choice(allowed_datapoints)
    a = np.zeros(len(spectrum))
    a[:num_non_zero_datapoints] = 1
    np.random.shuffle(a)

    incomplete_spectrum = list(np.multiply(a, np.array(spectrum)))
    non_zero_indices = np.nonzero(incomplete_spectrum)
    non_zero_values = list(filter(None,incomplete_spectrum))
    normalised_non_zero = spectrum_normalisation(non_zero_values)

    i = 0
    for index in non_zero_indices[0]:
        incomplete_spectrum[index] = normalised_non_zero[i]
        i+=1

    normalised_incomplete_spectrum = np.array(incomplete_spectrum)

    return normalised_incomplete_spectrum

def batch_spectrum_processing(batch, allowed_datapoints):
    processed = []

    for spectrum in batch:

        processed.append(spectrum_processing(spectrum, allowed_datapoints))
    return torch.tensor(np.array(processed))

Let's load the data from file

In [152]:
training_spectra_file = data_path / experiment_name / "training_spectra.pt"
validation_spectra_file = data_path / experiment_name / "validation_spectra.pt"
test_spectra_file = data_path / experiment_name / "test_spectra.pt"

training_oxygenations_file = data_path / experiment_name / "training_oxygenations.pt"
validation_oxygenations_file = data_path / experiment_name / "validation_oxygenations.pt"
test_oxygenations_file = data_path / experiment_name / "test_oxygenations.pt"

train_spectra_original = torch.load(training_spectra_file)
train_oxygenations_original = torch.load(training_oxygenations_file)
validation_spectra_original = torch.load(validation_spectra_file)
validation_oxygenations_original = torch.load(validation_oxygenations_file)
test_spectra_original = torch.load(test_spectra_file)
test_oxygenations_original = torch.load(test_oxygenations_file)

Now let's look at the dimensions

In [197]:
print(train_spectra_original.size())
print(train_oxygenations_original.size())
print(train_spectra_original[0])

torch.Size([134624, 41])
torch.Size([134624])
tensor([634.9278, 600.2585, 600.2339, 587.4062, 580.4452, 573.9892, 582.9027,
        597.7095, 601.8840, 641.6681, 655.6356, 704.5982, 730.0311, 739.1377,
        762.7631, 768.2003, 789.5642, 808.6349, 811.7870, 835.5294, 866.5328,
        886.8488, 918.7031, 905.1712, 913.7165, 913.7761, 913.4937, 919.7126,
        915.4688, 919.2101, 887.4873, 870.8792, 905.5049, 883.7628, 876.9416,
        888.4904, 881.3424, 888.5063, 892.4427, 879.3855, 869.1013],
       dtype=torch.float64)


In [196]:
# Zeroing out some of the spectrum data (randomly) and normalising
allowed_datapoints = [10]

train_spectra = batch_spectrum_processing(train_spectra_original, allowed_datapoints)
validation_spectra = batch_spectrum_processing(validation_spectra_original, allowed_datapoints)
test_spectra = batch_spectrum_processing(test_spectra_original, allowed_datapoints)

RuntimeError: Numpy is not available

In [193]:
# Reshaping initial pressure spectra to fit LSTM input size
train_spectra = torch.reshape(train_spectra, (len(train_spectra), len(train_spectra[0]), 1))
validation_spectra = torch.reshape(validation_spectra, (len(validation_spectra), len(validation_spectra[0]), 1))
test_spectra = torch.reshape(test_spectra, (len(test_spectra), len(test_spectra[0]), 1))

train_oxygenations = torch.reshape(train_oxygenations,(len(train_oxygenations),1))
validation_oxygenations = torch.reshape(validation_oxygenations,(len(validation_oxygenations),1))
test_oxygenations = torch.tensor(np.float32(test_oxygenations))
test_oxygenations = torch.reshape(test_oxygenations,(len(test_oxygenations),1))

NameError: name 'train_spectra' is not defined

In [None]:
class MultiSpectralPressureO2Dataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label