In [1]:
import librosa 
import librosa.display as display

import pandas as pd 
import numpy as np 
import scipy as sp 
import matplotlib.pyplot as plt 
%matplotlib inline

import os
import time

from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.feature_selection import VarianceThreshold, mutual_info_classif, chi2, SelectKBest, SelectPercentile
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [2]:
def read_captchas(path):
    wavs = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file)) and file.endswith('.wav')]
    captchas = []
    for wav in wavs:
        signal, sampling_rate = librosa.load(os.path.join(path,wav), None) 
        label = wav.split('.wav')[0]
        captchas.append((signal,sampling_rate,label))
    return captchas

In [3]:
def split_characters(captcha):
    chars = [] 
    signal = captcha[0]
    sampling_rate=captcha[1]
    label=captcha[2]
    chars += [(signal[sampling_rate*2*i:sampling_rate*2*(i+1)],sampling_rate,label[i]) for i in range(4)]
    return chars

In [4]:
def extract_features(chars):
    data = pd.DataFrame()
    for j, char in enumerate(chars):
        signal = char[0]
        sampling_rate=char[1]
        label=char[2]
        row = pd.DataFrame()
        row['label'] = [label]
        mfcc = librosa.feature.mfcc(signal,sampling_rate)
        for i,mfcc in enumerate(mfcc):
            row['mfcc_'+str(i)+'_mode'] = [np.mean(sp.stats.mode(mfcc))]
            row['mfcc_'+str(i)+'_min'] = [np.min(mfcc)]
            row['mfcc_'+str(i)+'_max'] = [np.max(mfcc)]
            row['mfcc_'+str(i)+'_mean'] = [np.mean(mfcc)]
            row['mfcc_'+str(i)+'_std'] = [np.std(mfcc)]
            row['mfcc_'+str(i)+'_median'] = [np.median(mfcc)]
            row['mfcc_'+str(i)+'_iqr'] = [sp.stats.iqr(mfcc)]
            row['mfcc_'+str(i)+'_kutosis'] = [sp.stats.kurtosis(mfcc)]
            row['mfcc_'+str(i)+'_skewness'] = [sp.stats.skew(mfcc)]
        data = data.append(row)
    return data.reset_index(drop=True)

In [None]:
def train_model(train_path,valid_path):
    train_captchas = read_captchas(train_path)
    train_chars = []
    for captcha in train_captchas:
        train_chars += split_characters(captcha)
    train_data = extract_features(train_chars)

    valid_captchas = read_captchas(valid_path)
    valid_chars =[]
    for captcha in valid_captchas:
        valid_chars += split_characters(captcha)
    valid_data = extract_features(valid_chars)
    
    X_train = train_data.drop(['label'],axis=1)
    y_train = train_data['label']
    
    X_valid = valid_data.drop(['label'],axis=1)
    y_valid = valid_data['label']
        
    scaler = StandardScaler()
    X_train = pd.DataFrame(scaler.fit_transform(X_train),columns=X_train.columns)
    X_valid = pd.DataFrame(scaler.transform(X_valid),columns=X_valid.columns)
    
    estimators = [GaussianNB(),KNeighborsClassifier(),LogisticRegression(),
              DecisionTreeClassifier(),RandomForestClassifier()]
    
    param_grids = [{},
               {'n_neighbors':[2,5,7,10,12,15], 'weights':['uniform','distance'],'p':[1,2]},
               {'C':np.logspace(-4,4,9),'penalty':['l1','l2'],'class_weight':[None,'balanced']},
               {'max_depth':np.linspace(2,15,10),'class_weight':[None,'balanced']},
               {'max_depth':np.linspace(2,15,10),'n_estimators':[50,100],'class_weight':[None,'balanced']},
         ]

    X = np.concatenate((X_train.values,X_valid.values),axis=0)
    y = np.concatenate((y_train.values,y_valid.values),axis=0)
    test_fold = []
    for i in range(len(X_train)):
        test_fold.append(-1)
    for i in range(len(X_valid)):
        test_fold.append(0)
    cv = PredefinedSplit(test_fold=test_fold)
    
    results = {'estimator':[],'cv score':[]}
    best_estimator = None
    best_score = 0
    for estimator,param_grid in zip(estimators,param_grids):
        gridsearch = GridSearchCV(estimator,param_grid,scoring='accuracy',cv=cv)
        gridsearch.fit(X,y)
        results['estimator'].append(str(gridsearch.best_estimator_).split('(')[0])
        results['cv score'].append(gridsearch.best_score_)
        if gridsearch.best_score_ > best_score:
            best_score = gridsearch.best_score_
            best_estimator = gridsearch.best_estimator_
    best_estimator.fit(X_train,y_train)
    return scaler, best_estimator

In [10]:
def evaluation_per_character(scaler,estimator,path):
    captchas = read_captchas(path)
    chars = []
    for captcha in captchas:
        chars += split_characters(captcha)
    data = extract_features(chars)
    X = data.drop(['label'],axis=1)
    y = data['label']
    X = pd.DataFrame(scaler.transform(X),columns=X.columns)
    y_pred = best_estimator.predict(X)
    return accuracy_score(y, y_pred), f1_score(y,y_pred,average='macro')

In [11]:
def evaluation_per_captcha(scaler,estimator,path):
    accuracy = 0
    captchas = read_captchas(path)
    for captcha in captchas:
        chars = split_characters(captcha)
        data = extract_features(chars)
        X = data.drop(['label'],axis=1)
        y = data['label']
        X = pd.DataFrame(scaler.transform(X),columns=X.columns)
        y_pred = best_estimator.predict(X)
        if np.equal(y_pred,y).sum() == 4:
            accuracy+=1
    return accuracy/len(captchas)

In [12]:
scaler, best_estimator = train_model('base_treinamento_I','base_validacao_I')

(800, 180) (588, 180)


In [13]:
evaluation_per_character(scaler,best_estimator,'base_treinamento_I')

(0.95999999999999996, 0.95565157732719452)

In [14]:
evaluation_per_character(scaler,best_estimator,'base_validacao_I')

(0.78911564625850339, 0.77946396989285083)

In [15]:
evaluation_per_captcha(scaler,best_estimator,'base_treinamento_I')

0.855

In [18]:
evaluation_per_captcha(scaler,best_estimator,'base_validacao_I')

0.40816326530612246