# Imports

In [1]:
import numpy as np
import progressbar as pb

# Configuration

In [2]:
# directory to store intermediate and final results of the experiment
EXPERIMENT_DIR      = "/home/schindlera/experiments/ismir2020_reviews/"

AUDIO_FILENAME_STEM = "melspec_128_10seconds_2ch"

# Normalize Data

## Calculate Means from Train Partition

Calculate means over time for each Mel-band and channel

In [3]:
audio_filename_train = "%s/%s_train.npz" % (EXPERIMENT_DIR, AUDIO_FILENAME_STEM)

with np.load(audio_filename_train, allow_pickle=True) as npz:
    data     = npz["data"]
    trackids = npz["track_ids"]

CPU times: user 4min 5s, sys: 1min 46s, total: 5min 51s
Wall time: 9min 9s


If you have already stored the values, use the code below to load them

In [6]:
means = data.mean(axis=(0,2), keepdims=True)
stds     = data.std(axis=(0,2), keepdims=True)

In [None]:
normalization_param_filename = "%s/normalization_parameters.npz" % (EXPERIMENT_DIR)

np.savez(normalization_param_filename, means=means, stds=stds)

## Normalize all Experiment Partitions

First, normalize the train partition, because the data is already loaded in memory

In [9]:
for i in pb.ProgressBar()(np.arange(0, data.shape[0], 100)):
    
    start = i
    stop  = start + 100
    
    data[start:stop,:,:,:] -= means
    data[start:stop,:,:,:] /= stds

100% (2475 of 2475) |####################| Elapsed Time: 0:04:28 Time:  0:04:28


Store normalized audio data

In [19]:
audio_filename_train = "%s/%s_norm_train.npz" % (EXPERIMENT_DIR, AUDIO_FILENAME_STEM)

np.savez(audio_filename_train, 
         data      = data, 
         track_ids = trackids)

CPU times: user 9 µs, sys: 0 ns, total: 9 µs
Wall time: 16.5 µs


In [21]:
del data, trackids

In [10]:
for par in ["val", "test"]:

    # load audio data
    audio_filename_par = "%s/%s_%s.npz" % (EXPERIMENT_DIR, AUDIO_FILENAME_STEM, par)
    
    with np.load(audio_filename_par, allow_pickle=True) as npz:
        data     = npz["data"]
        trackids = npz["track_ids"]
        
    # normalize audio data
    for i in pb.ProgressBar()(np.arange(0, data.shape[0], 100)):

        start = i
        stop  = start + 100

        data[start:stop,:,:,:] -= means
        data[start:stop,:,:,:] /= stds
        

    # store normalized audio data
    audio_filename_train = "%s/%s_norm_%s.npz" % (EXPERIMENT_DIR, AUDIO_FILENAME_STEM, par)

    np.savez(audio_filename_train, 
             data      = data, 
             track_ids = trackids)
        
        
    del data, trackids