### 0. Import the packages

In [5]:
import os,sys,shutil,mmap
import pickle as pickle
import numpy as np                                       # fast vectors and matrices

import time

sys.path.insert(0,'lib/')
import config

from resampy import resample

from intervaltree import Interval, IntervalTree

### 1. Resample the data
Resample the recording and labels of the 331 recordings from the data base from 44100 Hz to 11025 Hz.

#### 1.1 Resample the labels of the data set.
The 331 resampled labels are saved in a dictionary whose
- keys are integers (the ids of the recordings)
- values are intervaltrees (for the different intervals of the recording it gives the played notes between 1 and 128)

In [7]:
def resample_label(file_in, file_out, frame_rate, frame_rate_out):
    ratio = frame_rate_out / float(frame_rate)
    print('.. resampling {} ({}Hz) into {} ({}Hz)'.format(
        file_in, frame_rate, file_out, frame_rate_out))
    print('.. sampling with ratio {}'.format(ratio))
    resampled_data = {}
    with open(file_in, 'rb') as f_in:
        labels = pickle.load(f_in)
        
        for key, value in labels.items() :
            resampled_intervals = []
            for interval in value:
                resampled_begin = int(interval.begin * ratio)
                resampled_end = int(interval.end * ratio)
                resampled_interval = Interval(
                    resampled_begin, resampled_end, interval.data)
                resampled_intervals.append(resampled_interval) 
            data = IntervalTree(resampled_intervals)
            resampled_data[key] = data   
            
        print('.. saving output')
        with open(file_out, 'wb') as f_out:
            pickle.dump(resampled_data, f_out)

In [8]:
resample_label(config.labels_path, config.labels_path_11, 44100, 11025)

with open(config.labels_path_11, 'rb') as f:
    labels = pickle.load(f)

.. resampling data/labels_mirex.pckl (44100Hz) into data/labels_mirex_11.pckl (11025Hz)
.. sampling with ratio 0.25
.. saving output


In [9]:
print(len(labels), 'labels')

331 labels


In [10]:
for key, value in labels.items() :
    if key == 2239:
        # id of the recording
        print (key)
        # intervaltree containing for each note intervals in which it is played
        print (type(value))

2239
<class 'intervaltree.intervaltree.IntervalTree'>


#### 1.2 Resample recordings

Create a dictionary containing the resampled data (recordings). Use Memory-mapped file objects.
For each recording data[rec_id] is a tuple:
1. data[rec_id][0] is the memory-mapped file object to the recording. It can be accesed by 
~~~~~~
np.frombuffer(data[rec_id][0], dtype=np.float32).copy()
~~~~~~
2. data[rec_id][1] is its length (float)

In [25]:
sz_float = 4

In [26]:
def resample_music(file_in, file_out, frame_rate, frame_rate_out):
    ratio = frame_rate_out / float(frame_rate)
    print('.. resampling {} ({}Hz) into {} ({}Hz)'.format(
        file_in, frame_rate, file_out, frame_rate_out))
    print('.. sampling with ratio {}'.format(ratio))
 
    fd = os.open(file_in, os.O_RDONLY)
    buff = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
    data_in = np.frombuffer(buff, dtype=np.float32).copy()
    
    data_out = resample(data_in, frame_rate, frame_rate_out)
    with open(file_out, 'wb') as f_out:
        np.save(f_out, data_out)
        
    os.close(fd)


In [27]:
data = dict()
for record in os.listdir(config.records_path):
    resample_music(config.records_path + record, config.records_path_11 + record, 44100, 11025)
    fd = os.open(config.records_path_11 + record, os.O_RDONLY)
    buff = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
    data[int(record[:-4])] = (buff, len(buff)/sz_float)
    os.close(fd)

.. resampling data/records/2218.npy (44100Hz) into data/records_11/2218.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2512.npy (44100Hz) into data/records_11/2512.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2388.npy (44100Hz) into data/records_11/2388.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2678.npy (44100Hz) into data/records_11/2678.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2550.npy (44100Hz) into data/records_11/2550.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2213.npy (44100Hz) into data/records_11/2213.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1812.npy (44100Hz) into data/records_11/1812.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2405.npy (44100Hz) into data/records_11/2405.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1835.npy (44100Hz) into data/records_11/1835.npy (11025Hz)
..

.. resampling data/records/2557.npy (44100Hz) into data/records_11/2557.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1805.npy (44100Hz) into data/records_11/1805.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2313.npy (44100Hz) into data/records_11/2313.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2537.npy (44100Hz) into data/records_11/2537.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2295.npy (44100Hz) into data/records_11/2295.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2366.npy (44100Hz) into data/records_11/2366.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2211.npy (44100Hz) into data/records_11/2211.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1772.npy (44100Hz) into data/records_11/1772.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2201.npy (44100Hz) into data/records_11/2201.npy (11025Hz)
..

.. resampling data/records/2159.npy (44100Hz) into data/records_11/2159.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2531.npy (44100Hz) into data/records_11/2531.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1893.npy (44100Hz) into data/records_11/1893.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1789.npy (44100Hz) into data/records_11/1789.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2542.npy (44100Hz) into data/records_11/2542.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1923.npy (44100Hz) into data/records_11/1923.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2179.npy (44100Hz) into data/records_11/2179.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2310.npy (44100Hz) into data/records_11/2310.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2562.npy (44100Hz) into data/records_11/2562.npy (11025Hz)
..

.. resampling data/records/2432.npy (44100Hz) into data/records_11/2432.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2371.npy (44100Hz) into data/records_11/2371.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2596.npy (44100Hz) into data/records_11/2596.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2284.npy (44100Hz) into data/records_11/2284.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/1776.npy (44100Hz) into data/records_11/1776.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2593.npy (44100Hz) into data/records_11/2593.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2283.npy (44100Hz) into data/records_11/2283.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2383.npy (44100Hz) into data/records_11/2383.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2322.npy (44100Hz) into data/records_11/2322.npy (11025Hz)
..

.. resampling data/records/2231.npy (44100Hz) into data/records_11/2231.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2196.npy (44100Hz) into data/records_11/2196.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2210.npy (44100Hz) into data/records_11/2210.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2248.npy (44100Hz) into data/records_11/2248.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2314.npy (44100Hz) into data/records_11/2314.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2575.npy (44100Hz) into data/records_11/2575.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2594.npy (44100Hz) into data/records_11/2594.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2502.npy (44100Hz) into data/records_11/2502.npy (11025Hz)
.. sampling with ratio 0.25
.. resampling data/records/2626.npy (44100Hz) into data/records_11/2626.npy (11025Hz)
..

In [28]:
for key, value in data.items() :
    if key == 2494:
        # id of the recording
        print ('Key:',key,', Format:',type(key))
        # its value is a tuple
        print ('Value:',value)
        # 1. value: reference to recording
        print ("1. Entry: Memory-mapped file objects ,", data[2494][0])
        # to access content use:
        x = np.frombuffer(data[2494][0], dtype=np.float32).copy()
        print('For example:')
        print(x[3005:3017])
        print ("2. Entry: length of recording",str(data[2494][1]),", Type:",type(data[2494][1]))
        #print(len(x))
        

Key: 2494 , Format: <class 'int'>
Value: (<mmap.mmap object at 0x7f9671fe3300>, 3275168.0)
1. Entry: Memory-mapped file objects , <mmap.mmap object at 0x7f9671fe3300>
For example:
[-4.7592088e-08  1.7693633e-07 -3.0272071e-07  4.2005357e-07
 -5.2369535e-07  6.0711841e-07 -6.6018526e-07 -6.9671851e-06
 -5.5540130e-07  8.6105437e-08  5.2294681e-06  4.2070492e-06]
2. Entry: length of recording 3275168.0 , Type: <class 'float'>
