In [1]:
import os
import torch
from dataset_creation.utils import get_n_random_noises, create_noisy_traces
from snr.calc_snr import CalcSNR, SnrCalcStrategy

In [2]:
DATASETS_ORIGINS = ['ethz', 'geofon']
dataset_origin = 'ethz'
assert dataset_origin in DATASETS_ORIGINS, f'Expected dataset one of {DATASETS_ORIGINS}. Got {dataset_origin}.'

In [3]:
NUM_SHIFTS=6
NUM_SAMPLES=6000
DATASET_PATH=f'/home/moshe/datasets/GFZ/noisy_datasets/{dataset_origin}_{NUM_SAMPLES}_sample_joachim_noises_energy_ratio_snr/'
NOISES_PATH='/home/moshe/datasets/GFZ/Noises'
NUM_NOISY_VERSIONS_PER_TRACE=8
DESIRED_SNR=10
SAMPLING_RATE=100
AUGMENTED_WINDOW_SIZE=NUM_SAMPLES+SAMPLING_RATE*NUM_SHIFTS

In [4]:
dataset_traces_path = os.path.join(DATASET_PATH, 'original_dataset.pt')
dataset_labels_path = os.path.join(DATASET_PATH, 'original_labels.pt')

assert os.path.exists(DATASET_PATH), f'Directory {DATASET_PATH} does not exist'
assert os.path.exists(dataset_traces_path), f'file {dataset_traces_path} does not exist'
assert os.path.exists(dataset_labels_path), f'file {dataset_labels_path} does not exist'

## Load Dataset

In [5]:
dataset=torch.load(dataset_traces_path)
labels=torch.load(dataset_labels_path)

num_traces = dataset.shape[0]
num_labels = labels.shape[0]
num_samples = dataset.shape[-1]

assert num_labels==num_traces, f'Expected traces equal num labels.Got {num_traces} traces and {num_labels} labels'
assert num_samples==NUM_SAMPLES, f'Expected {NUM_SAMPLES} in each trace. Got {num_samples}.'

print(f'Loaded {num_traces} traces and corresponding labels.')

Loaded 1856 traces and corresponding labels.


## Create a Noisy Dataset

In [6]:
def create_single_noisy_version():
    # prepare full noises traces
    full_noise_traces: torch.tensor = get_n_random_noises(num_noises=num_traces, desired_window_size=AUGMENTED_WINDOW_SIZE, noises_path=NOISES_PATH,force_resample=True, filename='aaa', sampling_rate=SAMPLING_RATE, silent_exception_prints=True).squeeze()
    print(f'Loaded {full_noise_traces.shape[0]} noise traces. shape {full_noise_traces.shape}')
    # Trim the noise traces. Adjust to dataset num samples.
    noise_traces: torch.tensor = full_noise_traces[:,:,:NUM_SAMPLES].clone()

    print(f'Trimmed {noise_traces.shape[0]} noise traces to shape {noise_traces.shape}')
    # Create the noisy traces and get the ones that did not succeed
    version_noised_traces_list, version_noise_factors, version_not_included_indices = create_noisy_traces(dataset=dataset,desired_snr=DESIRED_SNR, labels=labels, noise_traces=noise_traces, calc_snr=CalcSNR(SnrCalcStrategy.ENERGY_RATIO))
    print(f'Created {len(version_noised_traces_list)} noisy traces')
    print(f'The following indices are not included {version_not_included_indices}')
    # Remove the corresponding indices from the full noises list and the label list
    included_indices_list = [i for i in range(num_traces) if i not in version_not_included_indices]
    version_labels = labels[included_indices_list].clone()
    version_full_noise_traces = full_noise_traces[included_indices_list].clone()
    return torch.stack(version_noised_traces_list, dim=0), version_labels, version_full_noise_traces, torch.tensor(version_noise_factors)

In [7]:
# all_noised_traces, all_labels, all_full_noise_traces, all_factors =  create_single_noisy_version()

In [8]:
for i in range(NUM_NOISY_VERSIONS_PER_TRACE):
    print('i =', i, 'create_single_noisy_version')
    version_noised_traces, version_labels, version_full_noise_traces, version_factors =  create_single_noisy_version()
    print('version shapes')
    print(version_noised_traces.shape, version_labels.shape, version_full_noise_traces.shape, version_factors.shape)
    if i > 0:
        print('all shapes')
        print(all_noised_traces.shape, all_labels.shape, all_full_noise_traces.shape, all_factors.shape)
    (all_noised_traces, all_labels, all_full_noise_traces, all_factors) = (version_noised_traces, version_labels, version_full_noise_traces, version_factors) if i == 0 else (torch.vstack((all_noised_traces, version_noised_traces)), torch.vstack((all_labels.unsqueeze(dim=1), version_labels.unsqueeze(dim=1))).squeeze(), torch.vstack((all_full_noise_traces, version_full_noise_traces)), torch.vstack((all_factors.unsqueeze(dim=1), version_factors.unsqueeze(dim=1))).squeeze())


i = 0 create_single_noisy_version


100%|██████████| 1856/1856 [00:10<00:00, 181.53it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:05<00:00, 354.48it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
i = 1 create_single_noisy_version


100%|██████████| 1856/1856 [00:09<00:00, 194.16it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:05<00:00, 362.42it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
all shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
i = 2 create_single_noisy_version


100%|██████████| 1856/1856 [00:08<00:00, 217.63it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:04<00:00, 374.08it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
all shapes
torch.Size([3712, 3, 6000]) torch.Size([3712]) torch.Size([3712, 3, 6600]) torch.Size([3712])
i = 3 create_single_noisy_version


100%|██████████| 1856/1856 [00:08<00:00, 206.54it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:04<00:00, 373.76it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
all shapes
torch.Size([5568, 3, 6000]) torch.Size([5568]) torch.Size([5568, 3, 6600]) torch.Size([5568])
i = 4 create_single_noisy_version


100%|██████████| 1856/1856 [00:08<00:00, 212.69it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:04<00:00, 375.87it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
all shapes
torch.Size([7424, 3, 6000]) torch.Size([7424]) torch.Size([7424, 3, 6600]) torch.Size([7424])
i = 5 create_single_noisy_version


100%|██████████| 1856/1856 [00:08<00:00, 212.03it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:05<00:00, 327.16it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
all shapes
torch.Size([9280, 3, 6000]) torch.Size([9280]) torch.Size([9280, 3, 6600]) torch.Size([9280])
i = 6 create_single_noisy_version


100%|██████████| 1856/1856 [00:09<00:00, 196.81it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:05<00:00, 362.60it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
all shapes
torch.Size([11136, 3, 6000]) torch.Size([11136]) torch.Size([11136, 3, 6600]) torch.Size([11136])
i = 7 create_single_noisy_version


100%|██████████| 1856/1856 [00:09<00:00, 200.36it/s]


Created a list of 1856 random noises of shape torch.Size([3, 6600])
stack to tensor
Stacked to tensor of shape torch.Size([1856, 3, 6600])
Loaded 1856 noise traces. shape torch.Size([1856, 3, 6600])
Trimmed 1856 noise traces to shape torch.Size([1856, 3, 6000])


100%|██████████| 1856/1856 [00:05<00:00, 366.96it/s]


Created 1856 noisy traces
The following indices are not included []
version shapes
torch.Size([1856, 3, 6000]) torch.Size([1856]) torch.Size([1856, 3, 6600]) torch.Size([1856])
all shapes
torch.Size([12992, 3, 6000]) torch.Size([12992]) torch.Size([12992, 3, 6600]) torch.Size([12992])


In [9]:
# del full_noise_traces, noised_traces_list
all_noised_traces.shape, all_labels.shape, all_full_noise_traces.shape, all_factors.shape

(torch.Size([14848, 3, 6000]),
 torch.Size([14848]),
 torch.Size([14848, 3, 6600]),
 torch.Size([14848]))

In [10]:
noisy_dataset_path = os.path.join(DATASET_PATH,f'noisy_dataset_snr_{DESIRED_SNR}')
noisy_dataset_path

'/home/moshe/datasets/GFZ/noisy_datasets/ethz_6000_sample_joachim_noises_energy_ratio_snr/noisy_dataset_snr_10'

In [12]:
assert os.path.exists(noisy_dataset_path), f'{noisy_dataset_path} does not exist'

In [13]:
torch.save(all_noised_traces, os.path.join(noisy_dataset_path, 'traces.pt'))
torch.save(all_full_noise_traces, os.path.join(noisy_dataset_path, 'full_noise_traces.pt'))
torch.save(all_labels,os.path.join(noisy_dataset_path, 'labels.pt'))
torch.save(all_factors, os.path.join(noisy_dataset_path, 'factors.pt'))