### referenced by - https://github.com/kuleshov/audio-super-res
* 아래 audio-super-res로 명시된것은 위의 참고자료 모델을 의미함

## 1. 학습에 사용될 원본 wav파일들을 사용하여 학습용 h5포맷 데이터 생성하기 
* ./data/train 폴더에 학습용 wav파일 원본들을 저장한다
* ./data/train.txt에 학습용 wav파일의 목록을 저장한다
* 전처리 과정에서 low-res와 high-res버전의 데이터를 생성하고 이를 h5포맷으로 저장한다

In [1]:
import os, argparse
import numpy as np
import h5py
import librosa
from scipy import interpolate
from scipy.signal import decimate
from scipy.signal import butter, lfilter
args = {
    'interpolate'               : 0, # False
    'dimension'                 : 8192 * 4, # dimension of patches
    'stride'                    : 8192 * 2, # stride of patches
    'scale'                     : 6, 
    'batch_size'                : 1,
    'sr'                        : 16000,# sampling rate
    'sam'                       : 1,
    'train_out'                 : 'train.h5',
    'train_in_dir'              : './data/train/',
    'valid_out'                 : 'valid.h5',
    'valid_in_dir'              : './data/valid/',
    'train_file_list'           : './data/train.txt',
    'valid_file_list'           : './data/valid.txt',
    'temp_out'                  : 'temp.h5',
    'temp_in_dir'               : './data/temp/',
    'temp_file_list'            : './data/temp.txt'
}

In [2]:

def upsample(x_lr, r):
    x_lr = x_lr.flatten()
    x_hr_len = len(x_lr) * r
    x_sp = np.zeros(x_hr_len)

    i_lr = np.arange(x_hr_len, step=r)
    i_hr = np.arange(x_hr_len)

    f = interpolate.splrep(i_lr, x_lr)

    x_sp = interpolate.splev(i_hr, f)

    return x_sp

In [3]:
def add_data_preprocessed(h5_file, inputfiles, in_dir, args, save_examples=False):
    
    # 1) read original dataset
    
    file_list = []
    file_extensions = set(['.wav'])
    with open(inputfiles) as f:
        for line in f:
            filename = line.strip()
            ext = os.path.splitext(filename)[1]
            if ext in file_extensions:
                file_list.append(os.path.join(in_dir, filename))
                
    num_files = len(file_list)
    
    # 2) read wav file (we always use interpolate mode) 
    # 3) create low-res version
    # 4) upsample low-res version for same data size
    # 5) patch the data
    
    d, d_lr = args['dimension'], (args['dimension'])
    s, s_lr = args['stride'], (args['stride'])
    hr_patches = list()
    lr_patches = list()
    for j, file_path in enumerate(file_list):
        if j % 10 == 0: print('%d/%d' % (j, num_files))
        
        # load audio file
        x, fs = librosa.load(file_path, sr=args['sr']) # sr = sample rates
        
        # crop so that it works with scailing ratio
        x_len = len(x)
        x = x[ : x_len - (x_len % args['scale'])]
        
        # generate low-res version
        x_lr = decimate(x, args['scale'])
        
        #  upsample data(we will use preprocessed low-res data)
        #  EX. scale x4 on dimension
        #  data (low-res )2048 ---> [cubic-upscaling] --> 8192 ---> model input (8192)
        #  label (high-res)8192 -----------------------------------> model output(8192)
        x_lr = upsample(x_lr, args['scale'])
        
        assert len(x) % args['scale'] == 0
        assert len(x_lr) == (len(x))
        
        # Generate patches
        max_i = len(x) - d + 1 # d = dimension
        for i in range(0, max_i, s): # s = strides 
            # keep only a fraction of all the patches
            u = np.random.uniform()
            if u > args['sam']: continue
            
            i_lr = i
            
            hr_patch = np.array( x[i : i+d] )
            lr_patch = np.array( x_lr[i_lr : i_lr + d_lr] )    
            assert len(hr_patch) == d
            assert len(lr_patch) == d_lr
            
            hr_patches.append(hr_patch.reshape((d,1)))
            lr_patches.append(lr_patch.reshape((d_lr,1)))
    
    
    # 6) save as .h5 files    
    # crop # of patches so that it's a multiple of mini-batch size
    num_hr_patches = len(hr_patches)
    num_lr_patches = len(lr_patches)
    
    print('num_hr_patches:', num_hr_patches)
    print('num_lr_patches:', num_lr_patches)
    print('batch_size:', args['batch_size'])
    num_to_keep_hr = int(np.floor(num_hr_patches / args['batch_size']) * args['batch_size'])
    hr_patches = np.array(hr_patches[:num_to_keep_hr])
    
    num_to_keep_lr = int(np.floor(num_lr_patches / args['batch_size']) * args['batch_size'])
    lr_patches = np.array(lr_patches[:num_to_keep_lr])

    if save_examples:
        librosa.output.write_wav('example-hr.wav', hr_patches[40], fs, norm=False)
        #librosa.output.write_wav('example-lr.wav', lr_patches[40], int(fs / args['scale']), norm=False)
        librosa.output.write_wav('example-lr.wav', lr_patches[40], fs, norm=False)
        print (hr_patches[40].shape)
        print (lr_patches[40].shape)
        print (hr_patches[40][0][:10])
        print (lr_patches[40][0][:10])
        print ('two examples saved')

    print ('hr_patches shape:',hr_patches.shape)
    print ('lr_patches shape:',lr_patches.shape)

    # create the hdf5 file
    data_set = h5_file.create_dataset('data', lr_patches.shape, np.float32) # lr
    label_set = h5_file.create_dataset('label', hr_patches.shape, np.float32) # hr

    data_set[...] = lr_patches
    label_set[...] = hr_patches

In [4]:
# create train
with h5py.File(args['train_out'], 'w') as f:
    add_data_preprocessed(f, args['train_file_list'], args['train_in_dir'],args, save_examples=True)

0/231
10/231
20/231
30/231
40/231
50/231
60/231
70/231
80/231
90/231
100/231
110/231
120/231
130/231
140/231
150/231
160/231
170/231
180/231
190/231
200/231
210/231
220/231
230/231
num_hr_patches: 621
num_lr_patches: 621
batch_size: 1
(32768, 1)
(32768, 1)
[1.7370978e-05]
[1.78091372e-05]
two examples saved
hr_patches shape: (621, 32768, 1)
lr_patches shape: (621, 32768, 1)


## 2. 마찬가지로 검증 데이터셋도 h5파일로 구성한다
* ./data/valid 폴더에 검증용 원본 데이터를 저장한다
* ./data/valid.txt 파일에 검증용 데이터 목록을 저장한다
* 1번과 같은 방식으로 h5파일 포맷으로 데이터를 생성

In [5]:
# create validation
with h5py.File(args['valid_out'], 'w') as f:
    add_data_preprocessed(f, args['valid_file_list'], args['valid_in_dir'],args, save_examples=True)

0/125
10/125
20/125
30/125
40/125
50/125
60/125
70/125
80/125
90/125
100/125
110/125
120/125
num_hr_patches: 162
num_lr_patches: 162
batch_size: 1
(32768, 1)
(32768, 1)
[-0.00555102]
[-0.00548858]
two examples saved
hr_patches shape: (162, 32768, 1)
lr_patches shape: (162, 32768, 1)


## 3. 기타 다른 데이터를 사용하기
* ./data/temp 폴더에 원본 wav파일을 저장
* ./data/temp.txt에 파일 리스트를 작성
* 아래 코드를 실행하여 h5포맷으로 데이터를 생성

In [6]:
# create another set
with h5py.File(args['temp_out'], 'w') as f:
    add_data_preprocessed(f, args['temp_file_list'], args['temp_in_dir'], args, save_examples=False)

num_hr_patches: 0
num_lr_patches: 0
batch_size: 1
hr_patches shape: (0,)
lr_patches shape: (0,)
