In [1]:
import glob
import pandas as pd
from pydub import AudioSegment
import numpy as np
from scipy.fftpack import fft

In [2]:
filelist = glob.glob("../outputs/selection/**.mp3")

In [3]:
filelist[:10]

['../outputs/selection/3080_4.mp3',
 '../outputs/selection/2033_10.mp3',
 '../outputs/selection/2033_38.mp3',
 '../outputs/selection/3764_12.mp3',
 '../outputs/selection/6432_36.mp3',
 '../outputs/selection/6432_22.mp3',
 '../outputs/selection/3005_4.mp3',
 '../outputs/selection/4294_12.mp3',
 '../outputs/selection/4294_13.mp3',
 '../outputs/selection/3005_5.mp3']

In [4]:
df = pd.DataFrame(filelist).rename(columns={0:'path'})

In [5]:
df.head()

Unnamed: 0,path
0,../outputs/selection/3080_4.mp3
1,../outputs/selection/2033_10.mp3
2,../outputs/selection/2033_38.mp3
3,../outputs/selection/3764_12.mp3
4,../outputs/selection/6432_36.mp3


#### Añado otras columnas que me irán haciendo falta:
- file
- label = identificador de la persona que habla en cada audio
- audio_num = número de audio para luego ordenar por él y poder generar datos balanceados de train, validation y test


In [None]:
df['file'] = df.path.apply(lambda x: (x.split("/")[3]))
df['label'] = df.file.apply(lambda x: (x.split("_")[0]))
df['audio_num'] = df.file.apply(lambda x: (x.split("_")[1].split(".")[0]))
df['fft'] = df.path.apply(lambda x: featuresFourier(x))

In [28]:
import librosa
import librosa.display

def mfcc(x):
    SAMPLE_RATE = 22050
    y, sr = librosa.load(x, sr=SAMPLE_RATE, duration = 20) # Chop audio at 5 secs... 
    mfcc = librosa.feature.mfcc(y=y, sr=SAMPLE_RATE, n_mfcc = 5) # 5 MFCC components
    return mfcc.tolist()

In [29]:
df['mfcc'] = df.path.apply(lambda x: mfcc(x))





















In [8]:
df.sort_values(by = ('audio_num'),inplace = True)

In [9]:
names = list(df.label.unique())
names

['3080',
 '4350',
 '3764',
 '2609',
 '8461',
 '4294',
 '2033',
 '6432',
 '3528',
 '3005']

In [10]:
name = {}
for i, n in enumerate(names):
    name[n] = i

name

{'3080': 0,
 '4350': 1,
 '3764': 2,
 '2609': 3,
 '8461': 4,
 '4294': 5,
 '2033': 6,
 '6432': 7,
 '3528': 8,
 '3005': 9}

In [11]:
df['label_num']=df.label.apply(lambda x: name[str(x)])

In [12]:
df.head()

Unnamed: 0,path,file,label,audio_num,fft,label_num
41,../outputs/selection/3080_0.mp3,3080_0.mp3,3080,0,"[203624.0, 127549.38680364558, 168847.45136232...",0
127,../outputs/selection/4350_0.mp3,4350_0.mp3,4350,0,"[119002.0, 157200.9488338087, 35995.0562783664...",1
153,../outputs/selection/3764_0.mp3,3764_0.mp3,3764,0,"[5660.0, 6146.923850902667, 3015.403158484773,...",2
88,../outputs/selection/2609_0.mp3,2609_0.mp3,2609,0,"[709086.0, 366258.75843936566, 365174.59353223...",3
107,../outputs/selection/8461_0.mp3,8461_0.mp3,8461,0,"[109705.0, 67920.98584428975, 45095.8354531856...",4


Separo de la muestra train (80%) y test (20%).

In [13]:
train_80 = int(df.shape[0]*0.8)
test_20 = int(df.shape[0]*0.2)

print(train_80, test_20)

160 40


In [14]:
df_train = df[:train_80]
df_test = df[-test_20:]

In [15]:
df_train['label_num'].value_counts(normalize=True)

9    0.1
8    0.1
7    0.1
6    0.1
5    0.1
4    0.1
3    0.1
2    0.1
1    0.1
0    0.1
Name: label_num, dtype: float64

In [16]:
df_test['label_num'].value_counts(normalize=True)

9    0.1
8    0.1
7    0.1
6    0.1
5    0.1
4    0.1
3    0.1
2    0.1
1    0.1
0    0.1
Name: label_num, dtype: float64

In [17]:
y = df['label_num'].to_numpy()
X = np.vstack(df['fft'])

print(y.shape, X.shape)

(200,) (200, 320000)


In [18]:
X_train, y_train, X_test, y_test = X[:train_80], y[:train_80], X[-test_20:], y[-test_20:]
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(160, 320000) (40, 320000) (160,) (40,)


## Neural Network

In [19]:
from keras import layers
from keras import models
from keras.layers.normalization import BatchNormalization
from keras.models import model_from_json
from sklearn import preprocessing
import json

Using TensorFlow backend.


In [22]:
number_classes = len(df.label_num.unique())

inshape=(X_train.shape[1],)
    
model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=inshape))
model.add(layers.Dense(256, activation='relu'))

model.add(BatchNormalization())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(number_classes,activation='softmax'))
model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy', 'mse'])
model.fit(X_train,
            y_train,
            epochs=50,
            batch_size=50,
            validation_data=(X_test, y_test))


results = model.evaluate(X_test, y_test)
print("\n")
print("Resultados: ",results)
print("\n")

Train on 160 samples, validate on 40 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Resultados:  [3.664210557937622, 0.550000011920929, 27.681848526000977]




In [24]:
predictions = model.predict(X_test)
#print("Predicción: ", predictions)

print("\nModelo: ", model,"\n")
name='../models/10_epoch'

model_json = model.to_json()
with open(name+'.json', "w") as json_file:
    json.dump(model_json, json_file)

model.save_weights(name+'.h5')
print(f"Modelo {name}.h5 guardado")


Modelo:  <keras.engine.sequential.Sequential object at 0x13d31db10> 

Modelo ../models/10_epoch.h5 guardado


In [25]:
df_predict = pd.DataFrame()
df_predict['y_test'] = y_test
df_predict['y_predict'] = [list(e) for e in predictions]
df_predict['label_predict'] = df_predict['y_predict'].apply(lambda x: x.index(max(x)))
df_predict['diff'] = (df_predict.y_test == df_predict.label_predict)

df_predict.head()

Unnamed: 0,y_test,y_predict,label_predict,diff
0,9,"[8.08595e-06, 9.250745e-11, 6.952356e-11, 4.30...",9,True
1,0,"[0.88529265, 1.2027399e-06, 6.243277e-06, 0.01...",0,True
2,5,"[1.4377359e-05, 2.4192543e-12, 1.2783491e-12, ...",5,True
3,7,"[2.1403764e-07, 6.0543466e-11, 4.8619633e-12, ...",7,True
4,3,"[1.1235422e-06, 2.117083e-13, 1.2766895e-11, 0...",3,True


In [26]:
df_predict['diff'].value_counts()

True     22
False    18
Name: diff, dtype: int64