In [1]:
import librosa 
import os
import numpy as np
from tqdm import tqdm
from time import time
import pandas as pd
from scipy import misc
import sys
import tensorflow as tf
from sklearn.model_selection import train_test_split


In [2]:
def load_audio(music_dir):
    
    folders = [os.path.join(music_dir,folder) for folder in list(os.walk(music_dir))[0][1]]
    print(folders[0])
    filenames = [[os.path.join(folder,f) for f in list(os.walk(folder))[0][2]] for folder in folders]
    filenames = [item for sublist in filenames for item in sublist]
    files = [item for item in filenames]
    track_ids = [str(int(filename.split('\\')[-1].split('.')[0])) for filename in filenames]
    print(files[0:10])
    print(track_ids[0:10])
    print("Number of files: " + str(len(files)))
    return files, track_ids




In [6]:
"""
NOT NEEDED for current analysis.
Load audio files, aggregate them over 100 sample intervals. Find mean, max and min for the features.
"""
def process_audio(files,track_ids,window = 100,sample = SAMPLE):
    clip_range = int(19800*SAMPLE/window)  ##To clip the feature vectors for equal length.
    train_data = []
    num_files = len(files[0:sample])
    for i in tqdm(range(num_files)):
        audio_vec = []
        audio, _ = librosa.load(files[i])
        audio = np.reshape(audio,(-1))
        for j in range(1,int(audio.shape[0]/window)):
            mean_val = np.mean(audio[j*window: min((j+1)*window,audio.shape[0])])
            max_val = np.max(audio[j*window: min((j+1)*window,audio.shape[0])])
            min_val = np.min(audio[j*window: min((j+1)*window,audio.shape[0])])
            #Append to audio vector for this audio file.
            audio_vec += [mean_val,max_val,min_val]
        audio_vec = np.array(audio_vec[:clip_range])
        train_data.append(np.array(audio_vec))
    train_data = np.array(train_data)
    print("Original audio shape: " + str(audio.shape))
    print("Condensed audio shape: " + str(train_data.shape))
    train_ids = track_ids[0:num_files]
    return train_data, train_ids
train_data, train_ids = process_audio(files,track_ids, 200,200)       

In [24]:
##Now aggregate results found in tracks.csv and get the labels.

def load_tracks_file(filepath):
    tracks = pd.read_csv(filepath)
    ##Set new columns for the dataframe and remove the multi-index.
    new_cols = tracks.iloc[0]
    tracks = tracks.iloc[1:]
    new_cols[0] = "track_id"
    tracks.columns = new_cols
    labels = tracks["genre_top"]
    
    #Track id column should be string type
    tracks.track_id = tracks.track_id.astype(int).astype(str)
    #tracks = tracks.set_index("track_id")
    return tracks,labels


In [3]:
"""
Extract the melspectrogram from the audio files.
"""

import sys
import numpy as np
import librosa as lb
from scipy import misc
import matplotlib.pyplot as plt
%matplotlib inline

Fs         = 12000
N_FFT      = 512
N_MELS     = 96
N_OVERLAP  = 256
DURA       = 29.12

def log_scale_melspectrogram(path, plot=False):
    signal, sr = lb.load(path, sr=Fs)

    n_sample = signal.shape[0]
    n_sample_fit = int(DURA*Fs)
    
    if n_sample < n_sample_fit:
        signal = np.hstack((signal, np.zeros((int(DURA*Fs) - n_sample,))))
    elif n_sample > n_sample_fit:
        signal = signal[round((n_sample-n_sample_fit)/2):round((n_sample+n_sample_fit)/2)]
    
    melspect = lb.amplitude_to_db(lb.feature.melspectrogram(y=signal, sr=Fs, hop_length=N_OVERLAP, n_fft=N_FFT, n_mels=N_MELS)**2, ref=1.0)

    if plot:
        melspect = melspect[np.newaxis, :]
        plt.imshow(melspect.reshape((melspect.shape[1],melspect.shape[2])))
        print(melspect.shape)

    return melspect

In [4]:
def obtain_mel_features(filenames):
    arr = np.zeros([len(filenames), 96, 1366])
    track_ids = [str(int(filename.split('\\')[-1].split('.')[0])) for filename in filenames]
    for i in tqdm(range(len(filenames))):
        mel = log_scale_melspectrogram(filenames[i], plot = False)
        arr[i,:,:] = mel
    return arr, track_ids
        

In [28]:
#Load audio
music_dir = "./music_samples/"
track_details_path = "./tracks_small.csv"
files, track_ids = load_audio(music_dir)

#Load csv file
filepath = "./tracks_small.csv"
tracks_df, labels = load_tracks_file(filepath)

#Process melspectrograms from the audio files
melspectrogram, track_ids = obtain_mel_features(files[0:200])

./music_samples/000
['./music_samples/000\\000002.mp3', './music_samples/000\\000005.mp3', './music_samples/000\\000010.mp3', './music_samples/000\\000140.mp3', './music_samples/000\\000141.mp3', './music_samples/000\\000148.mp3', './music_samples/000\\000182.mp3', './music_samples/000\\000190.mp3', './music_samples/000\\000193.mp3', './music_samples/000\\000194.mp3']
['2', '5', '10', '140', '141', '148', '182', '190', '193', '194']
Number of files: 8000


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [04:11<00:00,  1.26s/it]


In [29]:
"""
Now, to determine the labels for the audio files that we have.
"""
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

def find_new_labels(train_ids, tracks_df):
    labels_final = []
    for tr in train_ids:
        labels_final.append(tracks_df[tracks_df.track_id == tr]["genre_top"].values[0])
    #One hot encode
    label_dic = {label: i for (i,label) in enumerate(np.unique(labels_final))} #To store numerical values for each label
    label_nums = np.array([label_dic[l] for l in labels_final]).reshape((-1,1))
    print(label_nums.shape)
    one_hot_labels = OneHotEncoder().fit_transform(label_nums).todense()
    return one_hot_labels, label_dic

def make_splits(melspectrogram,one_hot_labels,test_size = 0.33):
    return train_test_split(melspectrogram, one_hot_labels, test_size = test_size, random_state = 42)


one_hot_labels, label_dic = find_new_labels(track_ids,tracks_df)

print("One hot label samples: ")
print(one_hot_labels)


    
#X_train, X_test, y_train, y_test = make_splits(melspectrogram,one_hot_labels)


(200, 1)
One hot label samples: 
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]]


In [32]:
import pickle
pickle.dump({"melspectrogram": melspectrogram, "one_hot_labels":one_hot_labels},open("audio_data_labels.pkl",'wb'))

In [6]:
"""
Load pickle file
"""
import pickle
temp = pickle.load(open('audio_data_labels.pkl','rb'))
melspectrogram = temp["melspectrogram"]
one_hot_labels = temp["one_hot_labels"]
del temp

In [12]:
def batch_norm(x, n_out, phase_train, scope='bn'):
    with tf.variable_scope(scope):
        beta = tf.Variable(tf.constant(0.0, shape=[n_out]),name='beta', trainable=True)
        gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),name='gamma', trainable=True)
        batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
        ema = tf.train.ExponentialMovingAverage(decay=0.5)

        def mean_var_with_update():
            ema_apply_op = ema.apply([batch_mean, batch_var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(batch_mean), tf.identity(batch_var)

        mean, var = tf.cond(phase_train,
                            mean_var_with_update,
                            lambda: (ema.average(batch_mean), ema.average(batch_var)))
        normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
    return normed

def cnn(melspectrogram, weights, phase_train):
    
    n_mels, mel_vals = melspectrogram.get_shape()[1],melspectrogram.get_shape()[2]
    
    x = tf.reshape(melspectrogram,[-1,1,n_mels,mel_vals])
    x = batch_norm(melspectrogram, mel_vals, phase_train)
    x = tf.reshape(melspectrogram,[-1,n_mels,mel_vals,1])
    conv2_1 = tf.add(tf.nn.conv2d(x, weights['wconv1'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv1'])
    conv2_1 = tf.nn.relu(batch_norm(conv2_1, 32, phase_train))
    mpool_1 = tf.nn.max_pool(conv2_1, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    dropout_1 = tf.nn.dropout(mpool_1, 0.5)

    conv2_2 = tf.add(tf.nn.conv2d(dropout_1, weights['wconv2'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv2'])
    conv2_2 = tf.nn.relu(batch_norm(conv2_2, 128, phase_train))
    mpool_2 = tf.nn.max_pool(conv2_2, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    dropout_2 = tf.nn.dropout(mpool_2, 0.5)

    conv2_3 = tf.add(tf.nn.conv2d(dropout_2, weights['wconv3'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv3'])
    conv2_3 = tf.nn.relu(batch_norm(conv2_3, 128, phase_train))
    mpool_3 = tf.nn.max_pool(conv2_3, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    dropout_3 = tf.nn.dropout(mpool_3, 0.5)

    conv2_4 = tf.add(tf.nn.conv2d(dropout_3, weights['wconv4'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv4'])
    conv2_4 = tf.nn.relu(batch_norm(conv2_4, 192, phase_train))
    mpool_4 = tf.nn.max_pool(conv2_4, ksize=[1, 3, 5, 1], strides=[1, 3, 5, 1], padding='VALID')
    dropout_4 = tf.nn.dropout(mpool_4, 0.5)

    conv2_5 = tf.add(tf.nn.conv2d(dropout_4, weights['wconv5'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv5'])
    conv2_5 = tf.nn.relu(batch_norm(conv2_5, 256, phase_train))
    mpool_5 = tf.nn.max_pool(conv2_5, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
    dropout_5 = tf.nn.dropout(mpool_5, 0.5)

    flat = tf.reshape(dropout_5, [-1, weights['woutput'].get_shape().as_list()[0]])
    p_y_X = tf.nn.sigmoid(tf.add(tf.matmul(flat,weights['woutput']),weights['boutput']))
    print(p_y_X.get_shape())

    return p_y_X

In [64]:
len(np.unique(labels))

8

In [65]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def init_biases(shape):
    return tf.Variable(tf.zeros(shape))

weights = {
        'wconv1':init_weights([3, 3, 1, 32]),
        'wconv2':init_weights([3, 3, 32, 128]),
        'wconv3':init_weights([3, 3, 128, 128]),
        'wconv4':init_weights([3, 3, 128, 192]),
        'wconv5':init_weights([3, 3, 192, 256]),
        'bconv1':init_biases([32]),
        'bconv2':init_biases([128]),
        'bconv3':init_biases([128]),
        'bconv4':init_biases([192]),
        'bconv5':init_biases([256]),
        'woutput':init_weights([256, len(label_dic.keys())]),
        'boutput':init_biases([len(label_dic.keys())])}



In [66]:
label_dic.keys()

dict_keys(['Hip-Hop', 'Electronic', 'Folk', 'International', 'Pop', 'Experimental', 'Rock'])

In [67]:
import sklearn.metrics as sm
batch_size    = 1
learning_rate = 0.003
n_epoch       = 50
n_samples     = len(X_train)                              # change to 1000 for entire dataset
cv_split      = 0.8                             
train_size    = int(n_samples * cv_split)                               
test_size     = n_samples - train_size



X = tf.placeholder("float", [None, 96, melspectrogram.shape[2], 1])
y = tf.placeholder("float", [None, len(label_dic.keys())])
lrate = tf.placeholder("float")
phase_train = tf.placeholder(tf.bool, name='phase_train')

y_ = cnn(X, weights, phase_train)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = y_))
train_op = tf.train.RMSPropOptimizer(learning_rate, 0.9).minimize(cost)
predict_op = y_

tags = sorted(label_dic.keys())

def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    return np.asarray(data_shuffle).reshape((batch_size,96,1366,1)), np.asarray(labels_shuffle)#.reshape((batch_size,96,1366,1)) 


with tf.Session() as sess:
    tf.initialize_all_variables().run()
    for i in range(n_epoch):
        print(i)
        #training_batch = zip(range(0, train_size, batch_size),range(batch_size, train_size+1, batch_size))
        for j in range(50):
            X_train_batch,y_train_batch = next_batch(batch_size,X_train,y_train)
            train_input_dict = {X: X_train_batch, 
                                y: y_train_batch,
                                lrate: learning_rate,
                                phase_train: True}
            sess.run(train_op, feed_dict=train_input_dict)
        #test_indices = np.arange(len(X_test))
        #np.random.shuffle(test_indices)
        #test_indices = test_indices[0:test_size]

        test_input_dict = {X: X_test.reshape(-1,96,1366,1),
                           y: y_test,
                           phase_train:True}
        predictions = sess.run(predict_op, feed_dict=test_input_dict)
        print('Epoch : ', i,  'AUC : ', sm.roc_auc_score(y_test, predictions, average='samples'))
        # print(i, np.mean(np.argmax(y_test[test_indices], axis=1) == predictions))
        # print sort_result(tags, predictions)[:5]

TypeError: Expected binary or unicode string, got -1

In [7]:
"""
Converts original dataset into the verification format. i.e. Input spectrograms X1, X2 and an output of 0 if they're of 
dissimilar genre and 1 if they're of the same genre.
Input: data , labels, sample (How many input samples do you want to transform?)
Output: Three lists X1, X2, indicators
"""
def form_verification_dataset(data,labels,sample=10):
    ##Insert assert statements here for the correct input data sizes
    assert len(data) == len(labels)
    #Select samples
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:sample]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    
    #Pair each sample with every other sample from the dataset. O(sample^2)
    num_mels,mel_vals = data.shape[1], data.shape[2]
    X1 = X2 = np.zeros((sample*(sample-1)//2,num_mels,mel_vals))
    indicators = []
    count = 0
    for i in range(0,len(data_shuffle)):
        for j in range(i+1,len(data_shuffle)):
            X1[count,:,:] = data_shuffle[i]
            X1[count,:,:] = data_shuffle[j]
            if np.equal(labels_shuffle[i], labels_shuffle[j]).all():
                indicators.append(0)
            else:
                indicators.append(1)
            count += 1
            

    return X1, X2, np.array(indicators)
    
    
    

In [8]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def init_biases(shape):
    return tf.Variable(tf.zeros(shape))

weights = {
        'wconv1':init_weights([3, 3, 1, 32]),
        'wconv2':init_weights([3, 3, 32, 128]),
        'wconv3':init_weights([3, 3, 128, 128]),
        'wconv4':init_weights([3, 3, 128, 192]),
        'wconv5':init_weights([3, 3, 192, 256]),
        'bconv1':init_biases([32]),
        'bconv2':init_biases([128]),
        'bconv3':init_biases([128]),
        'bconv4':init_biases([192]),
        'bconv5':init_biases([256]),
        'woutput':init_weights([256, 128]),
        'boutput':init_biases([128]),
        'woutput2':init_weights([256, 128]),
        'boutput2':init_biases([128]),
        'wfinal':init_weights([128, 1]),
        'bfinal':init_biases([1]),}

In [13]:
margin = 0.2

def dual_cnn(X_first,X_second, weights, phase_train):
    
    #assert X_first.shape == X_second.shape, str(X_first.shape) + " does not match with " + str(X_second.shape)
    #assert indicators.shape[0] == X_first.shape[0], "Indicator array is not of length " + indicators.shape[0]
    
    num_samples, n_mels, mel_vals = X1.shape[0],X1.shape[1],X1.shape[2]
    
    x_first = tf.reshape(X_first,[-1,1,n_mels,mel_vals])
    x_second = tf.reshape(X_second,[-1,1,n_mels,mel_vals])
    
    x_first = batch_norm(x_first, mel_vals, phase_train)
    x_second = batch_norm(x_first, mel_vals, phase_train)
    
    x_first = tf.reshape(x_first,[-1,n_mels,mel_vals,1])
    x_second = tf.reshape(x_second,[-1,n_mels,mel_vals,1])
    
    conv2_1_first = tf.add(tf.nn.conv2d(x_first, weights['wconv1'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv1'])
    conv2_1_second = tf.add(tf.nn.conv2d(x_second, weights['wconv1'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv1'])
    
    conv2_1_first = tf.nn.relu(batch_norm(conv2_1_first, 32, phase_train))
    conv2_1_second = tf.nn.relu(batch_norm(conv2_1_second, 32, phase_train))
    
    mpool_1_first = tf.nn.max_pool(conv2_1_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    mpool_1_second = tf.nn.max_pool(conv2_1_second, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    
    dropout_1_first = tf.nn.dropout(mpool_1_first, 0.5)
    dropout_1_second = tf.nn.dropout(mpool_1_second, 0.5)

    conv2_2_first = tf.add(tf.nn.conv2d(dropout_1_first, weights['wconv2'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv2'])
    conv2_2_second = tf.add(tf.nn.conv2d(dropout_1_second, weights['wconv2'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv2'])
    
    conv2_2_first = tf.nn.relu(batch_norm(conv2_2_first, 128, phase_train))
    conv2_2_second = tf.nn.relu(batch_norm(conv2_2_first, 128, phase_train))
    
    mpool_2_first = tf.nn.max_pool(conv2_2_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    mpool_2_second = tf.nn.max_pool(conv2_2_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    
    
    dropout_2_first = tf.nn.dropout(mpool_2_first, 0.5)
    dropout_2_second = tf.nn.dropout(mpool_2_second, 0.5)

    conv2_3_first = tf.add(tf.nn.conv2d(dropout_2_first, weights['wconv3'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv3'])
    conv2_3_second = tf.add(tf.nn.conv2d(dropout_2_second, weights['wconv3'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv3'])
    

    conv2_3_first = tf.nn.relu(batch_norm(conv2_3_first, 128, phase_train))
    conv2_3_second = tf.nn.relu(batch_norm(conv2_3_second, 128, phase_train))
    
    mpool_3_first = tf.nn.max_pool(conv2_3_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    mpool_3_second = tf.nn.max_pool(conv2_3_second, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    
    dropout_3_first = tf.nn.dropout(mpool_3_first, 0.5)
    dropout_3_second = tf.nn.dropout(mpool_3_second, 0.5)

    conv2_4_first = tf.add(tf.nn.conv2d(dropout_3_first, weights['wconv4'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv4'])
    conv2_4_second = tf.add(tf.nn.conv2d(dropout_3_second, weights['wconv4'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv4'])
    
    conv2_4_first = tf.nn.relu(batch_norm(conv2_4_first, 192, phase_train))
    conv2_4_second = tf.nn.relu(batch_norm(conv2_4_second, 192, phase_train))
    
    mpool_4_first = tf.nn.max_pool(conv2_4_first, ksize=[1, 3, 5, 1], strides=[1, 3, 5, 1], padding='VALID')
    mpool_4_second = tf.nn.max_pool(conv2_4_second, ksize=[1, 3, 5, 1], strides=[1, 3, 5, 1], padding='VALID')
    
    dropout_4_first = tf.nn.dropout(mpool_4_first, 0.5)
    dropout_4_second = tf.nn.dropout(mpool_4_second, 0.5)

    conv2_5_first = tf.add(tf.nn.conv2d(dropout_4_first, weights['wconv5'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv5'])
    conv2_5_second = tf.add(tf.nn.conv2d(dropout_4_second, weights['wconv5'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv5'])
    
    
    conv2_5_first = tf.nn.relu(batch_norm(conv2_5_first, 256, phase_train))
    conv2_5_second = tf.nn.relu(batch_norm(conv2_5_second, 256, phase_train))
    
    mpool_5_first = tf.nn.max_pool(conv2_5_first, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
    mpool_5_second = tf.nn.max_pool(conv2_5_second, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
    
    dropout_5_first = tf.nn.dropout(mpool_5_first, 0.5)
    dropout_5_second = tf.nn.dropout(mpool_5_second, 0.5)
    
    flat_first = tf.reshape(dropout_5_first, [-1, weights['woutput'].get_shape().as_list()[0]])
    flat_second = tf.reshape(dropout_5_second, [-1, weights['woutput'].get_shape().as_list()[0]])
    
    print(flat_first.get_shape())
    print(flat_second.get_shape())
    flat_first = tf.add(tf.matmul(flat_first, weights['woutput2']), weights['boutput2'])
    flat_second = tf.add(tf.matmul(flat_second, weights['woutput2']), weights['boutput2'])
    
    flat_first = tf.nn.relu(flat_first)
    flat_second = tf.nn.relu(flat_second)
    # Apply Dropout
    flat_first = tf.nn.dropout(flat_first, 0.4)
    flat_second = tf.nn.dropout(flat_second, 0.4)
    
    print(flat_first.get_shape())
    print(flat_second.get_shape())
    
    final_layer = tf.reshape(tf.concat(0, [flat_first, flat_second]),(-1,1))
    #final_layer = tf.add(tf.matmul(final_layer, weights['wfinal']), weights['bfinal'])
    print(final_layer.get_shape())
    """
    ##Add fully connected layers here
    dense_first = tf.layers.dense(inputs=flat_first, units=128, activation=tf.nn.relu)
    dense_second = tf.layers.dense(inputs=flat_second, units=128, activation=tf.nn.relu)
    print(dense_first.shape)
    print(dense_second.shape)
    final_layer = tf.reshape(tf.concat([dense_first, dense_second], 0),(1,-1))
    print(final_layer.shape)
    """
    p_y_X = tf.nn.sigmoid(tf.add(tf.matmul(final_layer,weights['wfinal']),weights['bfinal']))
    print(p_y_X.get_shape())
    return p_y_X
    """
    #Contrastive loss
    d = tf.reduce_sum(tf.square(dense_first - dense_second), 1)
    d_sqrt = tf.sqrt(d)
    loss = label * tf.square(tf.maximum(0., margin - d_sqrt)) + (1 - label) * d
    loss = 0.5 * tf.reduce_mean(loss)
    return loss
    """
    

In [14]:
import sklearn.metrics as sm
batch_size    = 1
learning_rate = 0.003
n_epoch       = 50
n_samples     = len(melspectrogram)                              # change to 1000 for entire dataset
cv_split      = 0.8                             
train_size    = int(n_samples * cv_split)                               
test_size     = n_samples - train_size

#Load the dataset
X1,X2,indicators = form_verification_dataset(melspectrogram,one_hot_labels,sample=10)
num_samples, n_mels, mel_vals = X1.shape[0],X1.shape[1],X1.shape[2]

#Split into training and testing
def make_splits_verif(X1,X2,indicators):
    inds = range(0,len(X1))
    train_inds,test_inds,_,_ = train_test_split(inds,inds,random_state = 42, test_size = 0.33)
    print(train_inds,test_inds)
    X1_train = [X1[idx] for idx in train_inds]
    X1_test = [X1[idx] for idx in test_inds]
    
    X2_train = [X2[idx] for idx in train_inds]
    X2_test = [X1[idx] for idx in test_inds]
    
    X1_test = np.asarray(X1_test).reshape(-1, n_mels,mel_vals,1)
    X2_test = np.asarray(X2_test).reshape(-1, n_mels,mel_vals,1)
    
    y_train, y_test = np.reshape([indicators[idx] for idx in train_inds],(-1,1)), np.reshape([indicators[idx] for idx in test_inds],(-1,1))
    return X1_train, X2_train, y_train, X1_test, X2_test, y_test

def next_batch_verif(num, X1, X2, indicators):
    '''
    Return a total of `num` random samples and labels. 
    '''
    num_samples, n_mels, mel_vals = X1.shape[0],X1.shape[1],X1.shape[2]
    idx = np.arange(0 , len(X1))
    np.random.shuffle(idx)
    idx = idx[:num]
    X1_shuffle = [X1[i] for i in idx]
    X2_shuffle = [X2[i] for i in idx]
    indicators_shuffle = [indicators[i] for i in idx]
    return (np.asarray(X1_shuffle).reshape((num,n_mels,mel_vals,1)), 
            np.asarray(X1_shuffle).reshape((num,n_mels,mel_vals,1)), 
            np.asarray(indicators_shuffle).reshape(-1,1))


#Make data splits
X1_train, X2_train, y_train, X1_test, X2_test, y_test = make_splits_verif(X1,X2,indicators)

X_first = tf.placeholder("float", [None, X1.shape[1], X1.shape[2], 1],name="First_input_vector")
X_second = tf.placeholder("float", [None, X1.shape[1], X1.shape[2], 1],name="Second_input_vector")

y = tf.placeholder("float", [None, 1],name="Truth_labels")
lrate = tf.placeholder("float",name="Learning_rate")
phase_train = tf.placeholder(tf.bool, name='phase_train')

y_ = dual_cnn(X_first, X_second, weights, phase_train)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = y_))
train_op = tf.train.RMSPropOptimizer(lrate, 0.9).minimize(cost)
predict_op = y_





with tf.Session() as sess:
    tf.initialize_all_variables().run()
    for i in range(n_epoch):
        print(i)
        #training_batch = zip(range(0, train_size, batch_size),range(batch_size, train_size+1, batch_size))
        for j in range(50):
            X1_train_batch,X2_train_batch,y_train_batch = next_batch_verif(batch_size,X1,X2,indicators)
            print(X1_train_batch.shape,X2_train_batch.shape,y_train_batch.shape )
            train_input_dict = {X_first: X1_train_batch,
                                X_second: X2_train_batch,
                                y: y_train_batch,
                                lrate: learning_rate,
                                phase_train: [True]}
            sess.run(train_op, feed_dict=train_input_dict)
        
        
        test_input_dict = {X_first: X1_test,
                           X_second: X2_test,
                           y: y_test,
                           phase_train:[False]}
        predictions = sess.run(predict_op, feed_dict=test_input_dict)
        print('Epoch : ', i,  'AUC : ', sm.roc_auc_score(y_test, predictions, average='samples'))
        # print(i, np.mean(np.argmax(y_test[test_indices], axis=1) == predictions))
        # print sort_result(tags, predictions)[:5]

[13, 42, 15, 9, 16, 37, 31, 27, 0, 30, 29, 5, 11, 33, 1, 40, 21, 2, 34, 23, 36, 10, 22, 18, 44, 20, 7, 14, 28, 38] [39, 25, 26, 43, 35, 41, 4, 12, 8, 3, 6, 24, 32, 19, 17]
(?, 256)
(?, 256)
(?, 128)
(?, 128)


TypeError: Expected int32, got list containing Tensors of type '_Message' instead.

In [56]:
tf.bool

tf.bool

In [163]:
X1,X2,indicators = form_verification_dataset(X_train,y_train,sample=10)

AssertionError: 

In [191]:
y_train.shape

AttributeError: 'list' object has no attribute 'shape'

In [3]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
sess

<tensorflow.python.client.session.Session at 0x19ffc8027f0>