# Jupyter Notebook - Procesamiento del habla
Primero, vamos a importar los paquetes necesarios para el desarrollo.

In [3]:
%matplotlib inline
import os
import plotly
import wav

import tensorflow as tf
import numpy as np

from matplotlib import pyplot
from IPython.display import Audio
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.utils import shuffle
from sklearn.base import BaseEstimator, ClassifierMixin

from plotly.graph_objs import Scatter, Layout, Bar, Figure

plotly.offline.init_notebook_mode()

ModuleNotFoundError: No module named 'matplotlib'

Comencemos primero definiendo algunas variables globales (parámetros del enunciado).

In [2]:
RUTA_DIRECTORIO_DATOS = "datos"
VENTANA_EN_SEGUNDOS = 10
SAMPLE_RATE = 16000
CANTIDAD_DE_FRAMES_A_PROCESAR = 318764 # Es el minimo de todos en este caso

Ahora vamos a cargar los archivos IPU y WAV

In [3]:
archivos_en_carpeta_datos = os.listdir(RUTA_DIRECTORIO_DATOS)
archivos_wav = []
archivos_ipu = []
for archivo in archivos_en_carpeta_datos:
    if (archivo[-3:] == "ipu"):
        archivos_ipu.append(archivo)
    elif (archivo[-3:] == "wav"):
        archivos_wav.append(archivo)

Creo mi set de datos (X's)

In [4]:
X = []
leng = []
for archivo_wav in archivos_wav:
    data, frames, _, duration = wav.load_from_wav(RUTA_DIRECTORIO_DATOS + "/" + archivo_wav)
    leng.append(len(data))
    X.append(data[:CANTIDAD_DE_FRAMES_A_PROCESAR])

X = np.asarray(X)
print(X.shape)

(184, 318764)


Creo las etiquetas (mis Y's)

In [5]:
y = []
y_sgd = []
for archivo_wav in archivos_wav:
    if archivo_wav[3] == "m":
        y.append([1,0])
        y_sgd.append(1)
    elif archivo_wav[3] == "f":
        y.append([0,1])
        y_sgd.append(0)
y = np.asarray(y)
y_sgd = np.asarray(y_sgd)

print(y.shape)
print(y_sgd.shape)

(184, 2)
(184,)


Testeo que los datos hayan sido cargados correctamente

In [6]:
Audio(data=X[25], rate=SAMPLE_RATE)

In [7]:
print(y[25])

[1 0]


Ahora separo en un set de entrenamiento y testing:

In [8]:
X, y = shuffle(X, y, random_state=42)
X_train = X[:150]
X_test = X[150:180]
y_train = y[:150]
y_test = y[150:180]

print("Shape X_train {}".format(np.shape(X_train)))
print("Shape X_test {}".format(np.shape(X_test)))
print("Shape y_train {}".format(np.shape(y_train)))
print("Shape y_test {}".format(np.shape(y_test)))

Shape X_train (150, 318764)
Shape X_test (30, 318764)
Shape y_train (150, 2)
Shape y_test (30, 2)


Ahora el siguiente paso es definir nuestro preprocesador de la entrada y nuestro clasificador. Para esta tarea vamos a utilizar Pipelines que se encargan de enviar mensajes de manera secuencial a los objetos que definamos en el Pipeline. En este caso vamos a pedirle el preproceso al objeto encargado de calcular las componentes principales, las cuales van a alimentar a nuestro clasificador

In [9]:
estimators = [("reduce_dim", PCA()), ('clf', SGDClassifier())]
pipeline = Pipeline(estimators)

Vamos a utilizar GridSearch para probar distintas configuraciones de parámetros sobre PCA y Knn.

In [10]:
N_COMPONENTS = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]
#N_NEIGHBORS = [1]

Creamos nuestro diccionario en el formato que toma GridSearchCV para efectivamente ejecutar las diferentes configuraciones

In [11]:
param_grid = [
    {
        'reduce_dim__n_components': N_COMPONENTS,
        #'knn__n_neighbors': N_NEIGHBORS,
    },
]

Para ver que también generaliza nuestro clasificador vamos a utilizar KFold Cross-Validation, en particular con 10 folds.

In [12]:
kfold = KFold(n_splits=5, shuffle=True)

Ahora nos falta fittear nuestro algoritmo y calcular los resultados sobre el conjunto de testeo

In [13]:
grid_search = GridSearchCV(estimator=pipeline, cv=kfold, param_grid=param_grid, n_jobs=-1)
grid_search.fit(X=X, y=y_sgd
               )
results = grid_search.cv_results_['mean_test_score']
legends = grid_search.cv_results_['params']

In [14]:
best_params = grid_search.best_params_
#best_n_neighbors = best_params['knn__n_neighbors']
best_n_components = best_params['reduce_dim__n_components']

print("La mejor mejor configuración de parámetros es: \n" + "Cantidad de componentes principales: " + str(best_n_components))

La mejor mejor configuración de parámetros es: 
Cantidad de componentes principales: 4


Una vez realizado esto, creamos los gráficos. Para esto creamos dos métodos, uno para la configuración del gráfico (es decir, necesitamos parsear los resultados obtenidos por PCA y KNN de manera que PlotLy los entienda y pueda graficar) y finalmente un método que nos permita graficar usando PlotLy.

In [15]:
def graph_config(legends, results):
    old_n_neighbors = legends[0]['knn__n_neighbors']
    set_of_lists_with_results = []
    list_with_results = []
    graph_names = []
    for index, legend in enumerate(legends):
        new_n_neighbors = legend['knn__n_neighbors']

        if new_n_neighbors == old_n_neighbors:
            list_with_results.append(results[index])
        else:
            graph_names.append(old_n_neighbors)
            set_of_lists_with_results.append(list_with_results)
            list_with_results = [results[index]]
            old_n_neighbors = new_n_neighbors
    graph_names.append(new_n_neighbors)
    set_of_lists_with_results.append(list_with_results)
    return graph_names, set_of_lists_with_results

def graph(N_COMPONENTS, graph_names, set_of_lists_with_results):
    traces = []
    for index, set in enumerate(set_of_lists_with_results):
        x = N_COMPONENTS
        y = set
        name = "cantidad de vecinos = " + str(graph_names[index])
        traces.append(Scatter(x=x, y=y, name=name))
    layout = Layout(
        xaxis=dict(
            title='Cantidad de componentes principales',
            type='log',
            autorange=True
        ),
        yaxis=dict(
            title='% Accuracy',
            type='log',
            autorange=True
        ),
        title="Medida de performance - Accuracy"
    )
    figure = Figure(data=traces, layout=layout)
    plotly.offline.iplot(figure)

Una vez hecho esto, pasemos a graficar:

In [17]:
#graph_names, set_of_lists_with_results = graph_config(legends, results)
graph(N_COMPONENTS, ["fafa"], [results])

Primero vamos a trabajar con redes convolucionales 1D. Para eso, definamos algunas funciones que nos serán útiles:

In [39]:
def reformat(dataset):
    shape = dataset.shape
    dataset = dataset.reshape(
        (shape[0], shape[1], 1)).astype(np.float32)
    return dataset

Formateo mi set de datos para que los interprete mi red neuronal

In [40]:
X = reformat(X)
print(X.shape)

(184, 318764, 1)


In [2]:
class CovNetClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, num_channels, image_size, batch_size, patch_size, epochs):
        # PARAMETROS
        self.num_channels = num_channels
        self.image_size = image_size
        self.batch_size = batch_size
        self.patch_size = patch_size
        self.epochs = epochs

        # PROPIEDADES DE LA RED
        # INICIALIZO EL GRAFO
        graph = tf.Graph()
        with graph.as_default():
            # Input data
            tf_X = tf.placeholder(
                tf.float32, shape=(self.batch_size, self.image_size, self.num_channels))
            tf_y = tf.placeholder(tf.float32, shape=(self.batch_size, self.num_labels))

            # Convolution variables
            # [filter_width, in_channels, out_channels]


            layer1_weights = tf.get_variable("layer1_weights", shape=[self.patch_size, self.num_channels, 16],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer1_biases = tf.Variable(tf.zeros([16]))
            layer2_weights = tf.get_variable("layer2_weights", shape=[self.patch_size, 16, 32],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer2_biases = tf.Variable(tf.zeros([32]))
            layer3_weights = tf.get_variable("layer3_weights", shape=[self.patch_size, 32, 64],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer3_biases = tf.Variable(tf.zeros([64]))
            layer4_weights = tf.get_variable("layer4_weights", shape=[self.patch_size, 64, 128],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer4_biases = tf.Variable(tf.constant(1.0, shape=[128]))

            layer5_weights = tf.get_variable("layer5_weights", shape=[self.patch_size, 128, 256],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer5_biases = tf.Variable(tf.zeros([256]))
            layer6_weights = tf.get_variable("layer6_weights", shape=[self.patch_size, 256, 512],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer6_biases = tf.Variable(tf.zeros([512]))
            layer7_weights = tf.get_variable("layer7_weights", shape=[self.patch_size, 512, 1024],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer7_biases = tf.Variable(tf.zeros([1024]))
            layer8_weights = tf.get_variable("layer8_weights", shape=[self.patch_size, 1024, 2048],
                                             initializer=tf.contrib.layers.xavier_initializer())
            layer8_biases = tf.Variable(tf.constant(1.0, shape=[2048]))
            #            layer9_weights = tf.Variable(tf.truncated_normal(
            #                [self.patch_size, 2048, 4096], stddev=0.1))
            #            layer9_biases = tf.Variable(tf.zeros([4096]))
            #            layer10_weights = tf.Variable(tf.truncated_normal(
            #                [self.patch_size, 4096, 8192], stddev=0.1))
            #            layer10_biases = tf.Variable(tf.zeros([8192]))

            # DNN variables
            layer14_weights = tf.get_variable("layer14_weights", shape=[512 * 2048, 200],
                                              initializer=tf.contrib.layers.xavier_initializer())
            layer14_biases = tf.Variable(tf.constant(1.0, shape=[200]))
            layer15_weights = tf.get_variable("layer15_weights", shape=[200, 200],
                                              initializer=tf.contrib.layers.xavier_initializer())
            layer15_biases = tf.Variable(tf.constant(1.0, shape=[200]))
            layer16_weights = tf.get_variable("layer16_weights", shape=[200, self.num_labels],
                                              initializer=tf.contrib.layers.xavier_initializer())
            layer16_biases = tf.Variable(tf.constant(1.0, shape=[self.num_labels]))

            # Training computation.
            logits = model(tf_X)
            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_y))

            # Optimizer.
            optimizer = tf.train.GradientDescentOptimizer(1e-4).minimize(loss)

    def model(data):
        # Convolutions
        conv = tf.nn.conv1d(data, layer1_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer1_biases)
        conv = tf.nn.conv1d(hidden, layer2_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer2_biases)
        conv = tf.nn.conv1d(hidden, layer3_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer3_biases)
        conv = tf.nn.conv1d(hidden, layer4_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer4_biases)
        conv = tf.nn.conv1d(hidden, layer5_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer5_biases)
        conv = tf.nn.conv1d(hidden, layer6_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer6_biases)
        conv = tf.nn.conv1d(hidden, layer7_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer7_biases)
        conv = tf.nn.conv1d(hidden, layer8_weights, 2, padding='SAME')
        hidden = tf.nn.relu(conv + layer8_biases)

        # DNN
        shape = hidden.get_shape().as_list()
        reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2]])
        hidden = tf.nn.relu(tf.matmul(reshape, layer14_weights) + layer14_biases)
        hidden = tf.nn.relu(tf.matmul(hidden, layer15_weights) + layer15_biases)
        return tf.matmul(hidden, layer16_weights) + layer16_biases

    def fit(self, X, y=None):
        with tf.Session(graph=graph) as session:
            tf.global_variables_initializer().run()
            print("Initialized")
            for step in range(self.epochs):
                batch_offset = (step * self.batch_size) % (len(X))
                batch_data = self.training_set[batch_offset:(batch_offset + self.batch_size)]
                batch_labels = self.training_labels[batch_offset:(batch_offset + self.batch_size)]
                batch_data, batch_labels = shuffle(batch_data, batch_labels, random_state=0)

                feed_dict = {tf_X: batch_data, tf_y: batch_labels}
                _, l, predictions = session.run(
                    [optimizer, loss, train_prediction], feed_dict=feed_dict)
        return self

    def predict(self, X, y=None):
        valid_prediction = tf.nn.softmax(model(X))
        return ([self._meaning(x) for x in X])

    def _accuracy(self, predictions, labels):
        result = (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])
        return result

    def score(self, X, y=None):
        predictions = self.predict(X)
        score = self._accuracy(predictions=predictions, labels=y)
        return score

NameError: name 'BaseEstimator' is not defined

Pasamos a entrenar nuestro modelo:

In [None]:
TRAINING_SET_LEN = 150
NUM_CHANNELS = 1
NUM_LABELS = 2
BATCH_SIZE = 10
PATCH_SIZE = 5

In [None]:
conv_net = ConvolutionalNeuralNetwork(training_set=X_train,
                                      validation_set=X_test,
                                      test_set=[],
                                      training_labels=y_train,
                                      validation_labels=y_test,
                                      test_labels=[], training_set_len=TRAINING_SET_LEN,
                                      num_labels=NUM_LABELS,
                                      num_channels=NUM_CHANNELS,
                                      image_size=CANTIDAD_DE_FRAMES_A_PROCESAR, 
                                      batch_size=BATCH_SIZE, patch_size=PATCH_SIZE)

conv_net.train()