In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import random
from sklearn.cross_validation import train_test_split



In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [3]:
# List .txt files in the root.
#
# Search query reference:
# https://developers.google.com/drive/v2/web/search-parameters
listed = drive.ListFile().GetList()
for file in listed:
  print('title {}, id {}'.format(file['title'], file['id']))

title sample_phones_with_shuffling.ipynb, id 1DgTzyyf5a7cowx2gNLwylqqoP-l968vh
title sample_phones_with_shuffling.ipynb, id 1_HhSJSMCsTEqpI5BZFAxFyF7k5DxEgTp
title RNN.ipynb, id 19r_dd89G14_x_NqYxACon_LVkg7R1qd0
title DNN.ipynb, id 1PouCenS2tKofZWjrOo7ke-eQ46p32iyI
title Untitled0.ipynb, id 1GO7GAV5U021FUw79Z6gZ8rS0N4Vzz7Im
title Untitled0.ipynb, id 1c2XTxnRJRhwpfjGVYynbw7rTzl3sT4gs
title Untitled1.ipynb, id 19L2jhVfnalXOvlgFI7TQj4nk0hdc8esZ
title mfcc_alignments.pkl, id 14X-vptKNFo9t-ltnNuXZp_UFHPyaAeA2
title mfcc_alignments.pkl, id 10FsU7gZvkTHyUuhQwUAwsLmEMkIiD4xL
title Untitled folder, id 1N3xA66rADBEFIiNYupzDFYLwKkwleaGV
title sample_phones_with_shuffling.ipynb, id 1tPhS-QoSx-Y9_TwTAUYX3nzzsJqs1vE5
title Colab Notebooks, id 1tebh5kjU9pWe5wmoCGi2oJqtt4VniNuW
title Getting started, id 0B-COj-emT6bUc3RhcnRlcl9maWxl


In [6]:
selection = "mfcc_alignments.pkl"
for file in listed:
  if file['title'] == selection:
    file.GetContentFile(selection)
import os
os.listdir(".")

['datalab',
 '.ipython',
 '.config',
 'mfcc_alignments.pkl',
 '.forever',
 '.rnd',
 '.cache',
 '.local',
 '.keras']

In [0]:
data=pd.read_pickle("mfcc_alignments.pkl")
labels_list = data["label"]
j=0
for i in range(1,288,6):
    labels_list = labels_list.replace(to_replace=np.arange(i,i+6), value=j)
    j+=1
data["label"] = labels_list

In [0]:
X_input = data["mfcc"]
Y_input = data["label"]

In [0]:
X1 = X_input.tolist()
Y1 = pd.get_dummies(Y_input.tolist()).values

In [0]:
X1 = np.asarray([np.asarray(ele, np.float32) for ele in X1])
Y1 = np.asarray([np.asarray(ele, np.uint16) for ele in Y1])

In [0]:
X1 = X1 / X1.max(axis=0)

In [0]:
select_indices = random.sample(range(0,len(X1)), len(X1))
X1 = X1[select_indices]
Y1 = Y1[select_indices]

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X1, Y1, test_size=0.33, random_state=0)

In [0]:
# Parameters
learning_rate = 0.001
training_epochs = 25
batch_size = 1000
display_step = 1

In [0]:
# Network Parameters
n_hidden_1 = 1024 # 1st layer number of neurons
n_hidden_2 = 1024 # 2nd layer number of neurons
n_hidden_3 = 1024 # 3rd layer number of neurons
n_hidden_4 = 1024 # 4th layer number of neurons
n_hidden_5 = 1024 # 5th layer number of neurons
n_hidden_6 = 1024 # 6th layer number of neurons

n_input = 13 # MFCC data input (img shape: n_samples*13)
n_classes = 48 # Phone total classes (288 senones)

In [0]:
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

In [0]:
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4])),
    'h5': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_5])),
    'h6': tf.Variable(tf.random_normal([n_hidden_5, n_hidden_6])),
    'out': tf.Variable(tf.random_normal([n_hidden_6, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'b4': tf.Variable(tf.random_normal([n_hidden_4])),
    'b5': tf.Variable(tf.random_normal([n_hidden_5])),
    'b6': tf.Variable(tf.random_normal([n_hidden_6])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [0]:
# Create model
def multilayer_perceptron(x):
    # Hidden fully connected layer with 1024 neurons
    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    
    # Hidden fully connected layer with 1024 neurons
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
    
    # Hidden fully connected layer with 1024 neurons
    layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']))
    
    # Hidden fully connected layer with 1024 neurons
    layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, weights['h4']), biases['b4']))
    
    # Hidden fully connected layer with 1024 neurons
    layer_5 = tf.nn.relu(tf.add(tf.matmul(layer_4, weights['h5']), biases['b5']))
    
    # Hidden fully connected layer with 1024 neurons
    layer_6 = tf.nn.relu(tf.add(tf.matmul(layer_5, weights['h6']), biases['b6']))
    
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_6, weights['out']) + biases['out']
    
    return out_layer

In [0]:
# Construct model
prediction = multilayer_perceptron(X)

In [0]:
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=Y))

In [0]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [0]:
# Initializing the variables
init = tf.global_variables_initializer()

In [0]:
sess = tf.InteractiveSession()

In [0]:
sess.run(init)

In [0]:
def shuffle_dataset(X, Y):
    random_indices = random.sample(range(0,len(X)), len(Y))
    X = X[random_indices]
    Y = Y[random_indices]
    return (X,Y)

In [39]:
for epoch in range(training_epochs):
  # Training cycle
  avg_cost = 0.
  total_batch = len(X_train)//batch_size
  X_shuffled, Y_shuffled = shuffle_dataset(X_train,y_train)
  # Loop over all batches
  for i in range(total_batch):
      batch_x, batch_y = X_shuffled[i*batch_size:(i+1)*batch_size], Y_shuffled[i*batch_size:(i+1)*batch_size]
      # Run optimization op (backprop) and cost op (to get loss value)
      _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                      Y: batch_y})
      # Compute average loss
      avg_cost += c / total_batch
      if(epoch == 0):
        if(i*batch_size % 100000 == 0):
          print("Steps completed: {0}/{1}".format(i, len(X_train)))
  print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))

Steps completed: 0/753631
Steps completed: 100/753631
Steps completed: 200/753631
Steps completed: 300/753631
Steps completed: 400/753631
Steps completed: 500/753631
Steps completed: 600/753631
Steps completed: 700/753631
Epoch: 0001 cost=15870091.261620179
Epoch: 0002 cost=4984907.030212483
Epoch: 0003 cost=3229775.474103590
Epoch: 0004 cost=2327217.242861884
Epoch: 0005 cost=1779749.126328021
Epoch: 0006 cost=1425837.579515271
Epoch: 0007 cost=1155745.961487384
Epoch: 0008 cost=961697.110640771
Epoch: 0009 cost=806430.227423638
Epoch: 0010 cost=680043.795318725
Epoch: 0011 cost=575743.362259295
Epoch: 0012 cost=494789.652639442
Epoch: 0013 cost=423981.239707835
Epoch: 0014 cost=366989.031457504
Epoch: 0015 cost=319266.606615206
Epoch: 0016 cost=277392.537288346
Epoch: 0017 cost=244312.811400232
Epoch: 0018 cost=216002.200489708
Epoch: 0019 cost=192796.958291832
Epoch: 0020 cost=168304.037931607
Epoch: 0021 cost=151685.243069389
Epoch: 0022 cost=134068.426927706
Epoch: 0023 cost=12224

In [40]:
# Test model
pred = tf.nn.softmax(prediction)  # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({X: X_test, Y: y_test}))

Accuracy: 0.33593667


In [41]:
# Test model
pred = tf.nn.softmax(prediction)  # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({X: X_train[:len(X_train)//2], Y: y_train[:len(y_train)//2]}))

Accuracy: 0.34640342


In [42]:
# Test model
pred = tf.nn.softmax(prediction)  # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({X: X_train[len(X_train)//2:], Y: y_train[len(y_train)//2:]}))

Accuracy: 0.34932435
