In [None]:
import numpy as np
import pandas as pd
from yaafelib import *
import matplotlib.pyplot as plt
import tensorflow as tf
from os import listdir
from os.path import isfile, join
import itertools

In [None]:
demo_df=pd.read_csv("speech-accent-archive/speakers_all.csv")

In [None]:
demo_df.ix[:,11].unique()

In [None]:
demo_df[demo_df.ix[:,11]=="no"]

In [4]:
dir_path="/home/kavin/Silo/CollegeWork/DeepLearning/Project/split/"
split_audiofiles = [join(dir_path, f) for f in listdir(dir_path) if isfile(join(dir_path, f))]

In [None]:
us_filename_prefix=list(demo_df[demo_df["country"]=='usa']["filename"])
india_filename_prefix=list(demo_df[demo_df["country"]=='india']["filename"])
selected_prefixes=us_filename_prefix+india_filename_prefix
#selected_prefixes=[us_filename_prefix[10], india_filename_prefix[2]]

In [None]:
selected_prefixes

In [None]:
selected_audiofiles=[]
for audiofile in split_audiofiles:
    #if(audiofile.split('_')[0] in selected_prefixes):
        #selected_audiofiles.append(join(dir_path, audiofile))
    selected_audiofiles.append(join(dir_path, audiofile))
        

In [None]:
selected_audiofiles=["/home/kavin/Silo/CollegeWork/DeepLearning/Project/speech-accent-archive/recordings/english104.mp3", 
                    "/home/kavin/Silo/CollegeWork/DeepLearning/Project/speech-accent-archive/recordings/bengali11.mp3"]

In [None]:
len(selected_audiofiles)

In [6]:
x_select=[]
y_select=[]

In [8]:
for audiofile in split_audiofiles:
    country=demo_df[demo_df["filename"]==audiofile.split("/")[-1].split('_')[0]]["country"].item()
    #country=demo_df[demo_df["filename"]==audiofile.split("/")[-1].split(".")[0]]["country"].item()
    
    fp = FeaturePlan(sample_rate=44100, resample=True)
    fp.addFeature('mfcc: MFCC blockSize=1024 stepSize=512')
    fp.addFeature('energy: Energy blockSize=1024 stepSize=512')
    fp.addFeature('loud: Loudness blockSize=1024 stepSize=512')
    fp.addFeature('sharp: PerceptualSharpness blockSize=1024 stepSize=512')  
    fp.addFeature('flat: SpectralFlatness blockSize=1024 stepSize=512')
    fp.addFeature('sr: SpectralRolloff blockSize=1024 stepSize=512')
    fp.addFeature('sf: SpectralFlux blockSize=1024 stepSize=512')
    df = fp.getDataFlow()

    # configure an Engine
    engine = Engine()
    engine.load(df)
    # extract features from an audio file using AudioFileProcessor
    afp = AudioFileProcessor()
    afp.processFile(engine,audiofile)
    feats = engine.readAllOutputs()
    feature_names=["mfcc", "energy","loud","sharp","flat","sr", "sf"]
    #print(feats.values()[0].shape)
    extracted_features=np.hstack([feats.get(key, []) for key in feature_names])
    #print((extracted_features.shape[0], 1))
    #print(country)
    y_select.append(np.full((extracted_features.shape[0], 1), country))
    x_select.append(extracted_features)
    
x_select=np.vstack(x_select)
y_select=np.vstack(y_select)

In [9]:
x_select.shape

(5114343, 42)

In [12]:
rand_indices=range(x_select.shape[0])
np.random.shuffle(rand_indices)

In [None]:
rand_indices[:10]

In [13]:
train_index=rand_indices[:int(x_select.shape[0]*0.6)]
val_index=rand_indices[int(x_select.shape[0]*0.6): int(x_select.shape[0]*0.8)]
test_index=rand_indices[int(x_select.shape[0]*0.8): x_select.shape[0]]

In [14]:
len(train_index)+len(val_index)+len(test_index)

5114343

In [15]:
sess = tf.InteractiveSession()

In [16]:
country_labels=list(np.unique(y_select))

labels=[country_labels.index(c) for c in y_select]

In [20]:
labels=tf.one_hot(labels, len(country_labels)).eval()

In [19]:
type(labels)

list

In [17]:
y_select=[]

In [None]:
feature_names

In [21]:
data_save={"train":{"features":x_select[train_index], "labels":labels[train_index], "feature_names":feature_names, "label_names":country_labels},
           "validation":{"features":x_select[val_index], "labels":labels[val_index], "feature_names":feature_names, "label_names":country_labels},
           "test":{"features":x_select[test_index], "labels":labels[test_index], "feature_names":feature_names, "label_names":country_labels}
          }

In [None]:
np.save("accent_data.npy", data_save)

In [None]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def init_bias(shape):
    return tf.Variable(tf.ones(shape))

def fully_connected_model(params, MODE, data):
    
    num_hidden_layers=1
    hidden_layer_size, learning_rate, minibatch_size, epoch = params
    
    input_layer_size=15
    output_layer_size=2

    x = tf.placeholder(tf.float32, [None, input_layer_size])
    
    W1 = init_weights([input_layer_size, hidden_layer_size])
    b1 = init_bias([hidden_layer_size])
    
    W2 = init_weights([hidden_layer_size, output_layer_size])
    b2 = init_bias([output_layer_size])
    
    h1 = tf.nn.tanh(tf.matmul(x, W1) + b1)
    y = tf.nn.softmax(tf.matmul(h1, W2) + b2)


    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, output_layer_size])
    
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()
    
    #sess.run(init)
    # Train
    dataset = tf.data.Dataset.from_tensor_slices((data["train"]["features"], data["train"]["labels"]))
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(minibatch_size)

    iterator = dataset.make_initializable_iterator()
    next_batch = iterator.get_next()
    
    for i in range(epoch):
        sess.run(iterator.initializer)
        while True:
            try:
                batch_xs, batch_ys = sess.run(next_batch)
                assert batch_xs.shape[0] == batch_ys.shape[0]
                #print(batch_ys.eval().shape[0])
                sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
                acc, loss=sess.run([accuracy, cross_entropy], feed_dict={x: batch_xs, y_: batch_ys})
                print("Train Iteration: {}, Loss: {}, Accuracy: {}".format(i, loss, acc))
            except tf.errors.OutOfRangeError:
                break

        
    if(MODE=="Tune"):
        return(sess.run([accuracy, cross_entropy], feed_dict={x: data["validation"]["features"], y_: data["validation"]["labels"]}))
    elif(MODE=="Test"):
        return(sess.run([accuracy, cross_entropy], feed_dict={x: data["test"]["features"], y_: data["test"]["labels"]}))


In [None]:
features = np.load("x_usa_india.npy")
labels = np.load("y_usa_india.npy")

country_labels=list(np.unique(labels))

labels=[country_labels.index(c) for c in labels]

labels=tf.one_hot(labels, len(country_labels)).eval()

In [None]:
accent_data=np.load("accent_data_usa_india.npy").item()

In [None]:
hidden_layer_size, learning_rate, minibatch_size, num_epoch = (30, 1e-4, 64, 10)
params=(hidden_layer_size, learning_rate, minibatch_size, num_epoch)

fully_connected_model(params, "Tune", accent_data)

In [None]:
hidden_layer_sizes=[50,100,300]
learning_rates=[0.1, 1e-3, 1e-5, 1e-7]
minibatch_sizes=[64,128]
epochs=[15000, 20000]
MODE="Tune"
hyperparameters = [hidden_layer_sizes,learning_rates, minibatch_sizes, epochs]
all_parameter_combinations=list(itertools.product(*hyperparameters))
costs=[]
accuracies=[]
for parameter_combo in all_parameter_combinations:
    hidden_layer_size, learning_rate, minibatch_size, num_epoch = parameter_combo
    acc, cost=fully_connected_model(params, MODE, accent_data)
    costs.append(cost)
    accuracies.append(acc)
    print("Hidden Layer Size: {}, Learning Rate: {}, Minibatch Size: {}, Number of Epochs: {}, Validation Cost: {}, Validation Accuracy: {}".
      format(hidden_layer_size,learning_rate,minibatch_size,num_epoch,cost,acc))

best_params=all_parameter_combinations[np.argmin(costs)]
print("Best Parameters: \n Hidden Layer Size: {}, Learning Rate: {}, Minibatch Size: {}, Number of Epochs: {}".
        format(best_params[0],best_params[1],best_params[2],best_params[3]))
params=best_params

In [None]:
# Assume that each row of `features` corresponds to the same row as `labels`.
assert features.shape[0] == labels.shape[0]

features_placeholder = tf.placeholder(features.dtype, features.shape)
labels_placeholder = tf.placeholder(labels.dtype, labels.shape)

dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
# [Other transformations on `dataset`...]
dataset = ...
iterator = dataset.make_initializable_iterator()

sess.run(iterator.initializer, feed_dict={features_placeholder: features,
                                          labels_placeholder: labels})

In [None]:
a=np.random.uniform(0, 1, (10, 5))
b=np.random.choice([0, 1], (10, 2))

In [None]:
b.shape

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((a, b))
batched_dataset = dataset.batch(4)

iterator = batched_dataset.make_one_shot_iterator()
next_element = iterator.get_next()

In [None]:
x, y=next_element

In [None]:
sess = tf.InteractiveSession()
print(sess.run(next_element)) 
print(sess.run(next_element))

In [None]:
dataset