In [1]:
import librosa
import pandas as pd
import numpy as np
import os
import csv
import IPython
import random
import matplotlib.pyplot as plt
import librosa.display
from random import randint
from random import sample

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.tree import ExtraTreeClassifier
from sklearn.metrics import accuracy_score

In [3]:
!wget https://github.com/sullyvan15/datasets/raw/master/audios.zip


--2020-12-05 03:06:58--  https://github.com/sullyvan15/datasets/raw/master/audios.zip
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2020-12-05 03:06:58 ERROR 404: Not Found.



In [4]:
!mkdir datasets ; mkdir datasets/audios
!mv audios.zip datasets/audios 
!cd datasets/audios ; unzip  audios.zip ; rm -rf audios.zip

mv: cannot stat 'audios.zip': No such file or directory
unzip:  cannot find or open audios.zip, audios.zip.zip or audios.zip.ZIP.


In [5]:
import IPython

audio_file = './datasets/audios/homem/homem_(5).ogg'
IPython.display.Audio(audio_file)

ValueError: ignored

In [None]:
import librosa

audio_data, sr = librosa.load(audio_file)

librosa.display.waveplot(audio_data)

## Comparar ondas sonoras


In [None]:
homens = list()
mulheres = list()

for h in [1, 6, 13]:
    audio_data, _ = librosa.load('./datasets/audios/{0}/{0}_({1}).ogg'.format('homem', h))
    homens.append(audio_data)

for m in [11, 12, 16]:
    audio_data, _ = librosa.load('./datasets/audios/{0}/{0}_({1}).ogg'.format('mulher', m))
    mulheres.append(audio_data)

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(15,10))

for i, audio_data in enumerate(homens):
    librosa.display.waveplot(audio_data, ax=axs[i, 0])
    
for i, audio_data in enumerate(mulheres):
    librosa.display.waveplot(audio_data, ax=axs[i, 1], color='orange')

## Spectograma

In [None]:
audio_file = './datasets/audios/mulher/mulher_(17).ogg'
audio_data, sr = librosa.load(audio_file)

In [None]:
X = librosa.stft(audio_data)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(20, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar()

In [None]:
X = librosa.stft(audio_data)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(20, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()

## Comprando Spectogramas

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(15,10))

def spec(audio_data, ax):
    X = librosa.stft(audio_data)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(20, 5))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log', ax=ax)

for i, audio_data in enumerate(homens):
    spec(audio_data, axs[i, 0])

for i, audio_data in enumerate(mulheres):
    spec(audio_data, axs[i, 1])

## Features Extration

In [None]:
files = list()
X = list()
y = list()


for label in ['homem', 'mulher']:
    for file in os.listdir('./datasets/audios/{}'.format(label)):
        audio_file = './datasets/audios/{}/{}'.format(label, file)
        audio_data, sr = librosa.load(audio_file, offset=0.5, duration=1)
        audio_stft = librosa.stft(audio_data)
        audio_db = librosa.amplitude_to_db(abs(audio_stft))
        files.append(audio_file)
        X.append(np.reshape(audio_db, audio_db.size))
        y.append(label)

In [None]:
scaler = MinMaxScaler((0, 1))
X_scaler = scaler.fit_transform(X)
X_best = SelectKBest(chi2, k=1000).fit_transform(X_scaler, y)

# Classificação

In [None]:
X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(X_best), y, test_size=0.3, random_state=42)

In [None]:
clf = ExtraTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
conferencia = X_test.copy()
conferencia['y_pred'] = y_pred
conferencia['y_real'] = y_test
conferencia.join(pd.DataFrame(files, columns=['filename']))[['y_pred', 'y_real', 'filename']]

In [None]:
audio_file = './datasets/audios/{0}/{0}_({1}).ogg'.format('homem', 2)

audio_data, sr = librosa.load(audio_file, offset=0.5, duration=1)
X = librosa.stft(audio_data)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(20, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()

# Estrutura Final

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin

class AudioDecode(BaseEstimator, TransformerMixin):
    def __main__(self):
        pass
    
    def __init__(self, offset=0.5, duration=1):
        self.offset = offset
        self.duration = duration

    def fit(self, X, y = None):
        self.X = X
        return self
    
    def transform(self, X, y = None):
        X_ = list()
        for index, value in X.iterrows():
            audio_data, sr = librosa.load(value[0], offset=self.offset, duration=self.duration)
            audio_stft = librosa.stft(audio_data)
            audio_db = librosa.amplitude_to_db(abs(audio_stft))
            X_.append(np.reshape(audio_db, audio_db.size))
        
        return pd.DataFrame(X_)

In [None]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([('pre_process', AudioDecode(offset=0.5, duration=1)),
                 ('scaler', MinMaxScaler((0, 1))), 
                 ('feature_select', SelectKBest(chi2, k=1000)), 
                 ('classifier', ExtraTreeClassifier(random_state=42))])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(files), y, test_size=0.3, random_state=12)
pipe.fit(X_train, y_train)

In [None]:
y_pred = pipe.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
!mkdir modelos

In [None]:
from joblib import dump, load

dump(pipe, 'modelos/sound_recognition.joblib') 

In [None]:
modelo  = load('modelos/sound_recognition.joblib')

In [None]:
modelo.predict(X_test)

In [None]:
modelo.predict(pd.DataFrame(['./datasets/audios/mulher/mulher_(10).ogg']))[0]

In [None]:
X_test