### Feature Extraction
- pkl을 로드해서 사용하는 부분 next_batch에 추가하기 (given이 true이면 하고 아니면 원래대로 하는 것으로)
- 한 노래 돌때 피쳐 모두 뽑아서 각각 저장하기

In [None]:
import os
import numpy as np
import pandas as pd
import features
from scipy import stats
import librosa
from tqdm import *
import pickle

In [None]:
def test(feature_name, split_method):
    with open('{}_{}.pkl'.format(feature_name, split_method), 'wb') as f:
        pickle.dump(
            get_tid_feature_pairs(
                get_track_ids(
                    split_df(
                        build_metadata_df('dataset/track_metadata.csv')
                        , split_method
                    )
                )
                , feature_name
            )
            , f
        )

def build_metadata_df(filepath):
    return pd.read_csv(filepath)

def split_df(metadata_df, split_method):
    return metadata_df[metadata_df['set_split'] == split_method]

def get_track_ids(metadata_df):
    return metadata_df['track_id'].values

test('rmse', 'training')
test('rmse', 'validation')
test('spectral_contrast', 'training')
test('spectral_contrast', 'validation')

# TODO : dictionary 로 track_id : array로 받아서 나중에 array로 바꾸는게 낫지 않을까? 현재는 일단 그냥 함.
# TODO : get_feature_by_tid로 바꾸기

In [None]:
def get_tid_feature_pairs(tids, feature_name):
    tid_feature_pairs = list()
    
    for tid in tqdm_notebook(tids):
        try:
            x, sr = load_audio_as_librosa_format(
                get_audio_path('music/music_training', tid)
                , DURATION=29.0
            )
            
            tid_feature_pairs.append((
                tid
                , np.array(validate_feature(tid, 
                     compute_feature(
                         cut_x(x.tolist(), THRESHOLD=1278900)
                         , feature_name
                     )
                ))
            ))
        except Exception as e:
            print('{}: {}'.format(tid, repr(e)))

    return tid_feature_pairs

def get_audio_path(audio_dir, track_id):
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

def load_audio_as_librosa_format(filepath, DURATION):
    return librosa.load(filepath, sr=None, mono=True, duration=DURATION)  # kaiser_fast

def cut_x(x, THRESHOLD):
    if not (len(x)<THRESHOLD):
        return np.array(x[:THRESHOLD])
    else:
        raise ValueError('song length is shorter than threshold')

def compute_feature(cutted_x, feature_name):
    if feature_name == 'chroma_stft':
        return get_chroma_stft(cutted_x).tolist()
    if feature_name == 'rmse':
        return get_rmse(cutted_x).tolist()
    if feature_name == 'spectral_contrast':
        return get_spectral_contrast(cutted_x).tolist()
    
def validate_feature(tid, feature):
    if feature ==0:
        raise ValueError('tid {} has feature error'.format(tid))
    else:
        return feature

def get_chroma_stft(cutted_x):
    try:
        return get_chhroma_stft(
            get_stft(cutted_x)
            , N_CHROMA=12
        )
    except Exception as e:
        print(str(e))
        raise ValueError("validate_stft error")
        
def get_stft(cutted_x):
    return validate_stft(
        compute_stft(cutted_x
            , N_FFT=2048
            , HOP_LENGTH=512
        )
        , cutted_x
        , N_FFT=2048
        , HOP_LENGTH=512
    )
    
def compute_stft(x, N_FFT, HOP_LENGTH):
    return np.abs(librosa.stft(x, n_fft=N_FFT, hop_length=HOP_LENGTH))

def validate_stft(stft, x, N_FFT, HOP_LENGTH):
    assert stft.shape[0] == 1 + N_FFT // 2
    assert np.ceil(len(x) / HOP_LENGTH) \
        <= stft.shape[1] \
        <= np.ceil(len(x) / HOP_LENGTH) + 1
    return stft

def compute_chhroma_stft(stft, N_CHROMA):
    return librosa.feature.chroma_stft(S=stft ** 2, n_chroma=N_CHROMA) 

def get_rmse(cutted_x):
    try:
        return compute_rmse(
            get_stft(cutted_x)
        )
    except Exception as e:
        print(str(e))
        raise ValueError("validate_stft error")
        
def compute_rmse(stft):
    return librosa.feature.rmse(S=stft)

def get_spectral_contrast(cutted_x):
    try:
        return compute_spectral_contrast(
            get_stft(cutted_x)
            , N_BANDS = 6
        )
    except Exception as e:
        print(str(e))
        raise ValueError("validate_stft error")
        
def compute_spectral_contrast(stft, N_BANDS):
    return librosa.feature.spectral_contrast(S=stft, n_bands=N_BANDS)