In [1]:
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings("ignore")

# Pré-Processamento dos Áudios

### Conversão para o formato .wav

Primeiro é preciso converter os arquivos .au para o formato .wav

In [42]:
for dir_ in os.listdir('genres'):
    for file_ in os.listdir('genres/'+dir_):
        os.system('sox ' + 'genres/'+dir_+'/'+file_+' ' + 'sound_input/'+dir_+'/'+file_[:file_.find('au')-1]+'.wav')

### Extração das Features

Após converter os audios para um formato que possa ser manipulado pelo Python iremos extrair informações importantes a respeito de cada som e montar nosso dataframe que será usado como input dos modelos de aprendizado de máquina.

In [3]:
# import das bibliotecas usadas nessa etapa de extracao das features
from scipy.stats import kurtosis
from scipy.stats import skew
import librosa

In [4]:
# dicionario para mapear cada som como um valor numérico (a fim de facilitar a classificacao)
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

In [7]:
def splitsongs(X, window = 0.1, overlap = 0.5):
    # Empty lists to hold our results
    temp_X = []

    # Get the input song array size
    xshape = X.shape[0]
    chunk = int(xshape*window)
    offset = int(chunk*(1.-overlap))
    
    # Split the song and create new ones on windows
    spsong = [X[i:i+chunk] for i in range(0, xshape - chunk + offset, offset)]
    for s in spsong:
        temp_X.append(s)

    return np.array(temp_X)

As features que serão extraídas de cada uma das músicas são:
1. **Spectral Centroid**

> Indica onde o “centro de massa” de um som está localizado e é calculado como a média ponderada das freqüências presentes no som.

2. **Spectral Rollof**

> É uma medida da forma do sinal. Representa a frequência abaixo da qual uma percentagem especificada da energia espectral total, p. 85%, estará situada.

3. **Zero Crossing Rate**

> O *Zero Crossing Rate* é a taxa de mudanças de sinal ao longo de um sinal, ou seja, a taxa na qual o sinal muda de positivo para negativo ou de volta.

4. **Root Mean Square**

> É uma média dos valores de amplitude ao longo do tempo, o que possibilita uma maneira mais precisa de medir o volume em relação à escala digital completa.

5. **Spectral Flux**

> É uma medida da rapidez com que o espectro de potência de um sinal está mudando, calculado pela comparação do espectro de energia de um quadro com o espectro de potência do quadro anterior.

6. **Chroma features**

> Os *Chroma features* são uma representação interessante e poderosa para o áudio de música, no qual todo o espectro é projetado em 12 faixas, representando os 12 semitons (ou cromas) distintos da oitava musical.


7. **Spectral Bandwidth**

> É o intervalo de comprimento de onda no qual uma quantidade espectral irradiada não é menor que a metade do seu valor máximo. É uma medida da extensão do espectro.

8. **Mel Frequency Cepstral Coefficients (MFCCs)**

> Os *Mel frequency cepstral coefficients (MFCCs)* de um sinal são um pequeno conjunto de características (geralmente em torno de 10–20) que descrevem forma geral de um envelope espectral. Ele modela as características da voz humana.


In [6]:
def get_features(y, sr, n_fft = 1024, hop_length = 512):
    """
        :param: y - series temporais do audio
        :param: sr - taxa de amostragem das series temporais
        :param: n_fft - tamanho da janela da FFT(Transformada de Fourier)
        :param: hop_length - numero de amostras entre quadros sucessivos
        
        :return: features - dicionario com cada uma das features extraidas da musica
    """
    # features que serao usadas
    features = {'centroid': None, 'roloff': None, 'flux': None, 'rmse': None, 'zcr': None,
                'chroma':None, 'bandwidth': None}
    
    # chama as funcoes do librosa que calcula cada uma das features
    features['centroid'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rmse(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    features['chroma'] = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['bandwidth'] = librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()

    # tratamento especial para o MFCC
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # pega as estatisticas de cada uma das features
    def get_moments(descriptors):
        result = {}
        for k, v in descriptors.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
            result['{}_kurtosis'.format(k)] = kurtosis(v)
            result['{}_skew'.format(k)] = skew(v)
        return result
    
    dict_agg_features = get_moments(features)
    dict_agg_features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
    
    return dict_agg_features

In [17]:
# loop que itera sobre todos os arquivos e calcula as suas respectivas features
arr_features = []
for dir_ in os.listdir('sound_input'):
    if '.DS_' in dir_:
        os.remove('sound_input/'+dir_)
    else:
        for file_ in os.listdir('sound_input/'+dir_):
            file_name = 'sound_input/'+dir_+'/'+file_
            if '.DS_' in file_name:
                os.remove(file_name)
            else:
                print("Reading file: {}".format(file_name))
                signal, sr = librosa.load(file_name)
                signals = splitsongs(signal)
                for sig in signals:
                    features = get_features(sig, sr)
                    features['genre'] = genres[dir_]

                    arr_features.append(features)

Reading file: sound_input/pop/pop.00027.wav
Reading file: sound_input/pop/pop.00033.wav
Reading file: sound_input/pop/pop.00032.wav
Reading file: sound_input/pop/pop.00026.wav
Reading file: sound_input/pop/pop.00030.wav
Reading file: sound_input/pop/pop.00024.wav
Reading file: sound_input/pop/pop.00018.wav
Reading file: sound_input/pop/pop.00019.wav
Reading file: sound_input/pop/pop.00025.wav
Reading file: sound_input/pop/pop.00031.wav
Reading file: sound_input/pop/pop.00009.wav
Reading file: sound_input/pop/pop.00035.wav
Reading file: sound_input/pop/pop.00021.wav
Reading file: sound_input/pop/pop.00020.wav
Reading file: sound_input/pop/pop.00034.wav
Reading file: sound_input/pop/pop.00008.wav
Reading file: sound_input/pop/pop.00022.wav
Reading file: sound_input/pop/pop.00036.wav
Reading file: sound_input/pop/pop.00037.wav
Reading file: sound_input/pop/pop.00023.wav
Reading file: sound_input/pop/pop.00044.wav
Reading file: sound_input/pop/pop.00050.wav
Reading file: sound_input/pop/po

Reading file: sound_input/metal/metal.00003.wav
Reading file: sound_input/metal/metal.00017.wav
Reading file: sound_input/metal/metal.00016.wav
Reading file: sound_input/metal/metal.00002.wav
Reading file: sound_input/metal/metal.00028.wav
Reading file: sound_input/metal/metal.00014.wav
Reading file: sound_input/metal/metal.00000.wav
Reading file: sound_input/metal/metal.00001.wav
Reading file: sound_input/metal/metal.00015.wav
Reading file: sound_input/metal/metal.00029.wav
Reading file: sound_input/metal/metal.00011.wav
Reading file: sound_input/metal/metal.00005.wav
Reading file: sound_input/metal/metal.00039.wav
Reading file: sound_input/metal/metal.00038.wav
Reading file: sound_input/metal/metal.00004.wav
Reading file: sound_input/metal/metal.00010.wav
Reading file: sound_input/metal/metal.00006.wav
Reading file: sound_input/metal/metal.00012.wav
Reading file: sound_input/metal/metal.00013.wav
Reading file: sound_input/metal/metal.00007.wav
Reading file: sound_input/disco/disco.00

Reading file: sound_input/blues/blues.00008.wav
Reading file: sound_input/blues/blues.00036.wav
Reading file: sound_input/blues/blues.00022.wav
Reading file: sound_input/blues/blues.00023.wav
Reading file: sound_input/blues/blues.00037.wav
Reading file: sound_input/blues/blues.00012.wav
Reading file: sound_input/blues/blues.00006.wav
Reading file: sound_input/blues/blues.00007.wav
Reading file: sound_input/blues/blues.00013.wav
Reading file: sound_input/blues/blues.00005.wav
Reading file: sound_input/blues/blues.00011.wav
Reading file: sound_input/blues/blues.00039.wav
Reading file: sound_input/blues/blues.00038.wav
Reading file: sound_input/blues/blues.00010.wav
Reading file: sound_input/blues/blues.00004.wav
Reading file: sound_input/blues/blues.00028.wav
Reading file: sound_input/blues/blues.00000.wav
Reading file: sound_input/blues/blues.00014.wav
Reading file: sound_input/blues/blues.00015.wav
Reading file: sound_input/blues/blues.00001.wav
Reading file: sound_input/blues/blues.00

Reading file: sound_input/classical/classical.00047.wav
Reading file: sound_input/classical/classical.00053.wav
Reading file: sound_input/classical/classical.00057.wav
Reading file: sound_input/classical/classical.00043.wav
Reading file: sound_input/classical/classical.00094.wav
Reading file: sound_input/classical/classical.00080.wav
Reading file: sound_input/classical/classical.00081.wav
Reading file: sound_input/classical/classical.00095.wav
Reading file: sound_input/classical/classical.00042.wav
Reading file: sound_input/classical/classical.00056.wav
Reading file: sound_input/classical/classical.00040.wav
Reading file: sound_input/classical/classical.00054.wav
Reading file: sound_input/classical/classical.00068.wav
Reading file: sound_input/classical/classical.00083.wav
Reading file: sound_input/classical/classical.00097.wav
Reading file: sound_input/classical/classical.00096.wav
Reading file: sound_input/classical/classical.00082.wav
Reading file: sound_input/classical/classical.00

Reading file: sound_input/rock/rock.00043.wav
Reading file: sound_input/rock/rock.00057.wav
Reading file: sound_input/rock/rock.00080.wav
Reading file: sound_input/rock/rock.00094.wav
Reading file: sound_input/rock/rock.00030.wav
Reading file: sound_input/rock/rock.00024.wav
Reading file: sound_input/rock/rock.00018.wav
Reading file: sound_input/rock/rock.00019.wav
Reading file: sound_input/rock/rock.00025.wav
Reading file: sound_input/rock/rock.00031.wav
Reading file: sound_input/rock/rock.00027.wav
Reading file: sound_input/rock/rock.00033.wav
Reading file: sound_input/rock/rock.00032.wav
Reading file: sound_input/rock/rock.00026.wav
Reading file: sound_input/rock/rock.00022.wav
Reading file: sound_input/rock/rock.00036.wav
Reading file: sound_input/rock/rock.00037.wav
Reading file: sound_input/rock/rock.00023.wav
Reading file: sound_input/rock/rock.00009.wav
Reading file: sound_input/rock/rock.00035.wav
Reading file: sound_input/rock/rock.00021.wav
Reading file: sound_input/rock/roc

Reading file: sound_input/country/country.00038.wav
Reading file: sound_input/country/country.00010.wav
Reading file: sound_input/country/country.00004.wav
Reading file: sound_input/country/country.00009.wav
Reading file: sound_input/country/country.00021.wav
Reading file: sound_input/country/country.00035.wav
Reading file: sound_input/country/country.00034.wav
Reading file: sound_input/country/country.00020.wav
Reading file: sound_input/country/country.00008.wav
Reading file: sound_input/country/country.00036.wav
Reading file: sound_input/country/country.00022.wav
Reading file: sound_input/country/country.00023.wav
Reading file: sound_input/country/country.00037.wav
Reading file: sound_input/country/country.00033.wav
Reading file: sound_input/country/country.00027.wav
Reading file: sound_input/country/country.00026.wav
Reading file: sound_input/country/country.00032.wav
Reading file: sound_input/country/country.00024.wav
Reading file: sound_input/country/country.00030.wav
Reading file

In [18]:
# converte a lista de dicionarios em um dataframe do pandas
df_features = pd.DataFrame(arr_features)
df_features.head()

Unnamed: 0,bandwidth_kurtosis,bandwidth_mean,bandwidth_skew,bandwidth_std,centroid_kurtosis,centroid_mean,centroid_skew,centroid_std,chroma_kurtosis,chroma_mean,...,rmse_std,roloff_kurtosis,roloff_mean,roloff_skew,roloff_std,tempo,zcr_kurtosis,zcr_mean,zcr_skew,zcr_std
0,0.792825,3072.275133,-1.201043,507.373242,-0.381658,2891.428242,-0.181103,927.581419,-0.996609,0.425177,...,0.062612,-0.297401,6602.908278,-0.86437,2199.022393,172.265625,1.592372,0.116526,1.00275,0.067756
1,-0.036886,3108.915592,-0.024222,474.607726,-0.275559,3004.068629,0.356506,926.389547,-0.967589,0.432395,...,0.059313,-0.708399,6694.672852,-0.429503,2034.134902,172.265625,1.310963,0.12032,0.941287,0.061194
2,-0.035471,2882.196422,0.507558,516.427608,1.138507,2707.77379,1.216916,1150.399104,-0.820446,0.411499,...,0.048298,-0.871476,5628.779297,0.228863,2278.133044,172.265625,6.739282,0.109788,2.212498,0.071455
3,-0.676834,2769.930595,-0.005707,505.898959,1.717588,2484.916583,1.29929,1124.511334,-0.655082,0.376489,...,0.049686,-0.925599,5121.589543,0.220824,2375.28141,172.265625,9.388941,0.101765,2.606648,0.07189
4,-0.415972,2986.59501,-0.510374,542.023823,-0.832082,2609.975944,0.112207,923.236585,-0.952736,0.410691,...,0.058376,-0.958447,5969.666466,-0.532559,2395.954406,172.265625,-0.38821,0.097341,0.692448,0.055044


In [20]:
# salva o dataframe como um arquivo .csv
df_features.to_csv('features_2.csv', sep=';', index=False)