In [1]:
import librosa 
import librosa.display as display

import pandas as pd 
import numpy as np 
import scipy as sp 
import matplotlib.pyplot as plt 
%matplotlib inline

import os 
import time

In [2]:
def read_captchas(path):
    wavs = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file)) and file.endswith('.wav')]
    captchas = []
    for wav in wavs:
        signal, sampling_rate = librosa.load(os.path.join(path,wav), None) 
        label = wav.split('.wav')[0]
        captchas.append((signal,sampling_rate,label))
    return captchas

In [3]:
def split_characters(captchas):
    chars = [] 
    for captcha in  captchas:
        signal = captcha[0]
        sampling_rate=captcha[1]
        label=captcha[2]
        chars += [(signal[sampling_rate*2*i:sampling_rate*2*(i+1)],sampling_rate,label[i]) for i in range(4)]
    return chars

In [4]:
def extract_features(chars):
    data = pd.DataFrame()
    for j, char in enumerate(chars):
        signal = char[0]
        sampling_rate=char[1]
        label=char[2]
        row = pd.DataFrame()
        row['label'] = [label]
#         row['signal_mode'] = [np.mean(sp.stats.mode(signal))]
#         row['signal_min'] = [np.min(signal)]
#         row['signal_max'] = [np.max(signal)]
#         row['signal_mean'] = [np.mean(signal)]
#         row['signal_std'] = [np.std(signal)]
#         row['signal_median'] = [np.median(signal)]
#         row['signal_iqr'] = [sp.stats.iqr(signal)]
#         row['signal_kutosis'] = [sp.stats.kurtosis(signal)]
#         row['signal_skewness'] = [sp.stats.skew(signal)]   
        mfcc = librosa.feature.mfcc(signal,sampling_rate)
        for i,mfcc in enumerate(mfcc):
            row['mfcc_'+str(i)+'_mode'] = [np.mean(sp.stats.mode(mfcc))]
            row['mfcc_'+str(i)+'_min'] = [np.min(mfcc)]
            row['mfcc_'+str(i)+'_max'] = [np.max(mfcc)]
            row['mfcc_'+str(i)+'_mean'] = [np.mean(mfcc)]
            row['mfcc_'+str(i)+'_std'] = [np.std(mfcc)]
            row['mfcc_'+str(i)+'_median'] = [np.median(mfcc)]
            row['mfcc_'+str(i)+'_iqr'] = [sp.stats.iqr(mfcc)]
            row['mfcc_'+str(i)+'_kutosis'] = [sp.stats.kurtosis(mfcc)]
            row['mfcc_'+str(i)+'_skewness'] = [sp.stats.skew(mfcc)]
        data = data.append(row)
    return data.reset_index(drop=True)

In [5]:
start = time.time()
train_captchas = read_captchas('base_treinamento_I')
print('read captchas: ', len(train_captchas))
train_chars = split_characters(train_captchas)
print('splited chars: ', len(train_chars))
train_data = extract_features(train_chars)
end = time.time()
print(end-start)

print('final dataframe: ', train_data.shape)
train_data.to_csv('train_data_librosa.csv',index=False)

read captchas:  200
splited chars:  800
1506508678.3855455
final dataframe:  (800, 181)


In [7]:
start = time.time()
valid_captchas = read_captchas('base_validacao_I')
print('read captchas: ', len(valid_captchas))
valid_chars = split_characters(valid_captchas)
print('splited chars: ', len(valid_chars))
valid_data = extract_features(valid_chars)
end = time.time()
print(end-start)

print('final dataframe: ', valid_data.shape)
valid_data.to_csv('valid_data_librosa.csv',index=False)

read captchas:  147
splited chars:  588
80.27947902679443
final dataframe:  (588, 181)
