In [1]:
import librosa 
import os
import numpy as np
from tqdm import tqdm
from time import time
import pandas as pd
from scipy import misc
import sys
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import pickle
SAMPLE = 100

In [6]:
def load_audio(music_dir):
    
    folders = [os.path.join(music_dir,folder) for folder in list(os.walk(music_dir))[0][1]]
    print(folders[0])
    filenames = [[os.path.join(folder,f) for f in list(os.walk(folder))[0][2]] for folder in folders]
    filenames = [item for sublist in filenames for item in sublist]
    files = [item for item in filenames]
    track_ids = [str(int(filename.split('\\')[-1].split('.')[0])) for filename in filenames]
    print(files[0:10])
    print(track_ids[0:10])
    print("Number of files: " + str(len(files)))
    return files, track_ids




In [9]:
"""
NOT NEEDED for current analysis.
Load audio files, aggregate them over 100 sample intervals. Find mean, max and min for the features.
"""
def process_audio(files,track_ids,window = 100,sample = SAMPLE):
    clip_range = int(19800*SAMPLE/window)  ##To clip the feature vectors for equal length.
    train_data = []
    num_files = len(files[0:sample])
    for i in tqdm(range(num_files)):
        audio_vec = []
        audio, _ = librosa.load(files[i])
        audio = np.reshape(audio,(-1))
        for j in range(1,int(audio.shape[0]/window)):
            mean_val = np.mean(audio[j*window: min((j+1)*window,audio.shape[0])])
            max_val = np.max(audio[j*window: min((j+1)*window,audio.shape[0])])
            min_val = np.min(audio[j*window: min((j+1)*window,audio.shape[0])])
            #Append to audio vector for this audio file.
            audio_vec += [mean_val,max_val,min_val]
        audio_vec = np.array(audio_vec[:clip_range])
        train_data.append(np.array(audio_vec))
    train_data = np.array(train_data)
    print("Original audio shape: " + str(audio.shape))
    print("Condensed audio shape: " + str(train_data.shape))
    train_ids = track_ids[0:num_files]
    return train_data, train_ids
#train_data, train_ids = process_audio(files,track_ids, 200,200)       

In [6]:
##Now aggregate results found in tracks.csv and get the labels.

def load_tracks_file(filepath):
    tracks = pd.read_csv(filepath)
    ##Set new columns for the dataframe and remove the multi-index.
    new_cols = tracks.iloc[0]
    tracks = tracks.iloc[1:]
    new_cols[0] = "track_id"
    tracks.columns = new_cols
    labels = tracks["genre_top"]
    
    #Track id column should be string type
    tracks.track_id = tracks.track_id.astype(int).astype(str)
    #tracks = tracks.set_index("track_id")
    return tracks,labels


In [10]:
"""
Extract the melspectrogram from the audio files.
"""

import sys
import numpy as np
import librosa as lb
from scipy import misc
import matplotlib.pyplot as plt
%matplotlib inline

Fs         = 12000
N_FFT      = 512
N_MELS     = 96
N_OVERLAP  = 256
DURA       = 29.12

def log_scale_melspectrogram(path, plot=False):
    signal, sr = lb.load(path, sr=Fs)

    n_sample = signal.shape[0]
    n_sample_fit = int(DURA*Fs)
    
    if n_sample < n_sample_fit:
        signal = np.hstack((signal, np.zeros((int(DURA*Fs) - n_sample,))))
    elif n_sample > n_sample_fit:
        signal = signal[round((n_sample-n_sample_fit)/2):round((n_sample+n_sample_fit)/2)]
    
    melspect = lb.amplitude_to_db(lb.feature.melspectrogram(y=signal, sr=Fs, hop_length=N_OVERLAP, n_fft=N_FFT, n_mels=N_MELS)**2, ref=1.0)

    if plot:
        melspect = melspect[np.newaxis, :]
        plt.imshow(melspect.reshape((melspect.shape[1],melspect.shape[2])))
        print(melspect.shape)

    return melspect

In [2]:
def obtain_mel_features(filenames):
    arr = np.zeros([len(filenames), 96, 1366])
    track_ids = [str(int(filename.split('\\')[-1].split('.')[0])) for filename in filenames]
    for i in tqdm(range(len(filenames))):
        mel = log_scale_melspectrogram(filenames[i], plot = False)
        arr[i,:,:] = mel
    return arr, track_ids

"""
Now, to determine the labels for the audio files that we have.
"""

def find_new_labels(train_ids, tracks_df):
    labels_final = []
    failed_indices = []
    for i,tr in enumerate(train_ids):
        try:
            labels_final.append(tracks_df[tracks_df.track_id == tr]["genre_top"].values[0])
        except:
            #Store the index and track_id of such failed rows - and remove these in the audio features
            failed_indices.append((i,tr))
    #One hot encode
    label_dic = {label: i for (i,label) in enumerate(np.unique(labels_final))} #To store numerical values for each label
    label_nums = np.array([label_dic[l] for l in labels_final]).reshape((-1,1))
    print(label_nums.shape)
    one_hot_labels = OneHotEncoder().fit_transform(label_nums).todense()
    return one_hot_labels, label_dic,failed_indices


def make_splits(melspectrogram,one_hot_labels,test_size = 0.33):
    return train_test_split(melspectrogram, one_hot_labels, test_size = test_size, random_state = 42)


  
#X_train, X_test, y_train, y_test = make_splits(melspectrogram,one_hot_labels)


In [9]:
if not os.path.exists("./melspectrogram1.pkl"):
    #Load audio
    music_dir = "./music_samples/"
    track_details_path = "./tracks_small.csv"
    files, track_ids = load_audio(music_dir)

    #Load csv file
    filepath = "./tracks_small.csv"
    tracks_df, labels = load_tracks_file(filepath)

    #Process melspectrograms from the audio files
    melspectrogram, track_ids = obtain_mel_features(files[0:4000])
    one_hot_labels, label_dic = find_new_labels(track_ids,tracks_df)
    pickle.dump({"melspectrogram": melspectrogram, "one_hot_labels":one_hot_labels, "label_dic":label_dic},
                open("audio_data_labels.pkl",'wb'))
    pickle.dump(melspectrogram[0:1000],open("./melspectrogram1.pkl",'wb'),protocol =2)
    pickle.dump(melspectrogram[1001:2000],open("./melspectrogram2.pkl",'wb'),protocol =2)
    pickle.dump(melspectrogram[2001:3000],open("./melspectrogram3.pkl",'wb'),protocol =2)
    pickle.dump(melspectrogram[3001:4000],open("./melspectrogram4.pkl",'wb'),protocol =2)
    pickle.dump(track_ids,open("./track_ids.pkl",'wb'),protocol =2)
else:
    print("Loading existing data...")
    melspectrogram = pickle.load(open('./melspectrogram1.pkl','rb'))
    track_ids = pickle.load(open('./track_ids.pkl','rb'))[0:len(melspectrogram)]
tracks_df, labels = load_tracks_file("./tracks_small.csv")  

Loading existing data...


In [10]:
def remove_failed_indices(melspectrogram, track_ids, failed_inds):
    print("Original melspectrogram shape: " + str(len(melspectrogram)))
    print("Number of failed indicies: " + str(len(failed_inds)))
    #First perform assertions to check if all the correct ids were obtained    
    for i in range(len(failed_inds)):
        assert track_ids[failed_inds[i][0]] ==  failed_inds[i][1], "Failed at index " + str(i)
    #Remove these from the audio features
    melspectrogram_new = [m for i,m in enumerate(melspectrogram) if not i in list(zip(*failed_inds))[0]]
    track_ids_new = [t for i,t in enumerate(track_ids) if not i in list(zip(*failed_inds))[0]]
    print("New melspectrogram shape: " + str(len(melspectrogram_new)))
    print("New track_ids shape: " + str(len(track_ids_new)))
    print("%d indices were removed" %len(failed_inds))
    assert len(melspectrogram_new) == len(track_ids_new) == len(melspectrogram) - len(failed_inds) 
    return melspectrogram_new, track_ids_new


##Get the labels for these audio features and remove any indices that fail due to some index reason.
one_hot_labels, label_dic,failed_inds = find_new_labels(track_ids,tracks_df)
mel_new, trs_new = remove_failed_indices(melspectrogram,track_ids,failed_inds)
mel_new = np.array(mel_new)

(994, 1)
Original melspectrogram shape: 1000
Number of failed indicies: 6
New melspectrogram shape: 994
New track_ids shape: 994
6 indices were removed


In [20]:
X1,X2,indicators = form_verification_dataset(mel_new,one_hot_labels,sample=50)
pickle.dump(X1, open("X1_data.pkl",'wb'),protocol =2)
pickle.dump(X2, open("X2_data.pkl",'wb'),protocol =2)
pickle.dump(indicators, open("indicator_data.pkl",'wb'),protocol =2)

Indicators: 0 One Hot[[1. 0.]]


In [21]:
pickle.dump(mel_new,open("./melspectrogram1.pkl",'wb'),protocol =2)

In [2]:
def batch_norm(x, n_out, phase_train, scope='bn'):
    with tf.variable_scope(scope):
        beta = tf.Variable(tf.constant(0.0, shape=[n_out]),name='beta', trainable=True)
        gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),name='gamma', trainable=True)
        batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
        ema = tf.train.ExponentialMovingAverage(decay=0.5)

        def mean_var_with_update():
            ema_apply_op = ema.apply([batch_mean, batch_var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(batch_mean), tf.identity(batch_var)

        mean, var = tf.cond(phase_train,
                            mean_var_with_update,
                            lambda: (ema.average(batch_mean), ema.average(batch_var)))
        normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
    return normed



In [3]:
"""
Converts original dataset into the verification format. i.e. Input spectrograms X1, X2 and an output of 0 if they're of 
dissimilar genre and 1 if they're of the same genre.
Input: data , labels, sample (How many input samples do you want to transform?)
Output: Three lists X1, X2, indicators
"""
def form_verification_dataset(data,labels,sample):
    ##Insert assert statements here for the correct input data sizes
    assert len(data) == len(labels)
    #Select samples
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:sample]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    
    #Pair each sample with every other sample from the dataset. O(sample^2)
    num_mels,mel_vals = data.shape[1], data.shape[2]
    X1 = X2 = np.zeros((sample*(sample-1)//2,num_mels,mel_vals))
    indicators = []
    count = 0
    for i in range(0,len(data_shuffle)):
        for j in range(i+1,len(data_shuffle)):
            X1[count,:,:] = data_shuffle[i]
            X1[count,:,:] = data_shuffle[j]
            if np.equal(labels_shuffle[i], labels_shuffle[j]).all():
                indicators.append(0)
            else:
                indicators.append(1)
            count += 1
            
    one_hot = OneHotEncoder().fit_transform(np.array(indicators).reshape(-1,1)).todense()
    print("Indicators: " + str(indicators[0]) + " One Hot" + str(one_hot[0]))
    return X1, X2, one_hot.reshape(-1,2)
    
    
    

In [4]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def init_biases(shape):
    return tf.Variable(tf.zeros(shape))

weights = {
        'wconv1':init_weights([3, 3, 1, 32]),
        'wconv2':init_weights([3, 3, 32, 128]),
        'wconv3':init_weights([3, 3, 128, 128]),
        'wconv4':init_weights([3, 3, 128, 192]),
        'wconv5':init_weights([3, 3, 192, 256]),
        'bconv1':init_biases([32]),
        'bconv2':init_biases([128]),
        'bconv3':init_biases([128]),
        'bconv4':init_biases([192]),
        'bconv5':init_biases([256]),
        'woutput':init_weights([256, 128]),
        'boutput':init_biases([128]),
        'woutput2':init_weights([256, 128]),
        'boutput2':init_biases([128]),
        'wfinal':init_weights([256, 2]),
        'bfinal':init_biases([2]),}

In [18]:
margin = 0.2

def dual_cnn(X_first,X_second, weights, phase_train,keep_prob):
    
    #assert X_first.shape == X_second.shape, str(X_first.shape) + " does not match with " + str(X_second.shape)
    #assert indicators.shape[0] == X_first.shape[0], "Indicator array is not of length " + indicators.shape[0]
    
    num_samples, n_mels, mel_vals = X1_train.shape[0],X1_train.shape[1],X1_train.shape[2]
    x_first = X_first
    x_second = X_second
    x_first = batch_norm(x_first, mel_vals, phase_train)
    x_second = batch_norm(x_first, mel_vals, phase_train)
    
    x_first = tf.reshape(x_first,[-1,n_mels,mel_vals,1])
    x_second = tf.reshape(x_second,[-1,n_mels,mel_vals,1])
    
    conv2_1_first = tf.add(tf.nn.conv2d(x_first, weights['wconv1'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv1'])
    conv2_1_second = tf.add(tf.nn.conv2d(x_second, weights['wconv1'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv1'])
    
    conv2_1_first = tf.nn.relu(batch_norm(conv2_1_first, 32, phase_train))
    conv2_1_second = tf.nn.relu(batch_norm(conv2_1_second, 32, phase_train))
    
    mpool_1_first = tf.nn.max_pool(conv2_1_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    mpool_1_second = tf.nn.max_pool(conv2_1_second, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    
    dropout_1_first = tf.nn.dropout(mpool_1_first, keep_prob)
    dropout_1_second = tf.nn.dropout(mpool_1_second, keep_prob)

    conv2_2_first = tf.add(tf.nn.conv2d(dropout_1_first, weights['wconv2'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv2'])
    conv2_2_second = tf.add(tf.nn.conv2d(dropout_1_second, weights['wconv2'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv2'])
    
    conv2_2_first = tf.nn.relu(batch_norm(conv2_2_first, 128, phase_train))
    conv2_2_second = tf.nn.relu(batch_norm(conv2_2_first, 128, phase_train))
    
    mpool_2_first = tf.nn.max_pool(conv2_2_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    mpool_2_second = tf.nn.max_pool(conv2_2_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    
    
    dropout_2_first = tf.nn.dropout(mpool_2_first, keep_prob)
    dropout_2_second = tf.nn.dropout(mpool_2_second, keep_prob)

    conv2_3_first = tf.add(tf.nn.conv2d(dropout_2_first, weights['wconv3'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv3'])
    conv2_3_second = tf.add(tf.nn.conv2d(dropout_2_second, weights['wconv3'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv3'])
    

    conv2_3_first = tf.nn.relu(batch_norm(conv2_3_first, 128, phase_train))
    conv2_3_second = tf.nn.relu(batch_norm(conv2_3_second, 128, phase_train))
    
    mpool_3_first = tf.nn.max_pool(conv2_3_first, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    mpool_3_second = tf.nn.max_pool(conv2_3_second, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
    
    dropout_3_first = tf.nn.dropout(mpool_3_first, keep_prob)
    dropout_3_second = tf.nn.dropout(mpool_3_second, keep_prob)

    conv2_4_first = tf.add(tf.nn.conv2d(dropout_3_first, weights['wconv4'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv4'])
    conv2_4_second = tf.add(tf.nn.conv2d(dropout_3_second, weights['wconv4'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv4'])
    
    conv2_4_first = tf.nn.relu(batch_norm(conv2_4_first, 192, phase_train))
    conv2_4_second = tf.nn.relu(batch_norm(conv2_4_second, 192, phase_train))
    
    mpool_4_first = tf.nn.max_pool(conv2_4_first, ksize=[1, 3, 5, 1], strides=[1, 3, 5, 1], padding='VALID')
    mpool_4_second = tf.nn.max_pool(conv2_4_second, ksize=[1, 3, 5, 1], strides=[1, 3, 5, 1], padding='VALID')
    
    dropout_4_first = tf.nn.dropout(mpool_4_first, keep_prob)
    dropout_4_second = tf.nn.dropout(mpool_4_second, keep_prob)

    conv2_5_first = tf.add(tf.nn.conv2d(dropout_4_first, weights['wconv5'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv5'])
    conv2_5_second = tf.add(tf.nn.conv2d(dropout_4_second, weights['wconv5'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv5'])
    
    
    conv2_5_first = tf.nn.relu(batch_norm(conv2_5_first, 256, phase_train))
    conv2_5_second = tf.nn.relu(batch_norm(conv2_5_second, 256, phase_train))
    
    mpool_5_first = tf.nn.max_pool(conv2_5_first, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
    mpool_5_second = tf.nn.max_pool(conv2_5_second, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
    
    dropout_5_first = tf.nn.dropout(mpool_5_first, keep_prob)
    dropout_5_second = tf.nn.dropout(mpool_5_second, keep_prob)
    
    flat_first = tf.reshape(dropout_5_first, [-1, weights['woutput'].get_shape().as_list()[0]])
    flat_second = tf.reshape(dropout_5_second, [-1, weights['woutput'].get_shape().as_list()[0]])
    
    print(flat_first.get_shape())
    print(flat_second.get_shape())
    flat_first = tf.add(tf.matmul(flat_first, weights['woutput2']), weights['boutput2'])
    flat_second = tf.add(tf.matmul(flat_second, weights['woutput2']), weights['boutput2'])
    
    flat_first = tf.nn.relu(flat_first)
    flat_second = tf.nn.relu(flat_second)
    # Apply Dropout
    flat_first = tf.nn.dropout(flat_first, keep_prob)
    flat_second = tf.nn.dropout(flat_second, keep_prob)
    
    print(flat_first.get_shape())
    print(flat_second.get_shape())
    
    final_layer = tf.concat([flat_first, flat_second],1)
    #final_layer = tf.add(tf.matmul(final_layer, weights['wfinal']), weights['bfinal'])
    print(final_layer.get_shape())
    """
    ##Add fully connected layers here
    dense_first = tf.layers.dense(inputs=flat_first, units=128, activation=tf.nn.relu)
    dense_second = tf.layers.dense(inputs=flat_second, units=128, activation=tf.nn.relu)
    print(dense_first.shape)
    print(dense_second.shape)
    final_layer = tf.reshape(tf.concat([dense_first, dense_second], 0),(1,-1))
    print(final_layer.shape)
    """
    p_y_X = tf.nn.sigmoid(tf.add(tf.matmul(final_layer,weights['wfinal']),weights['bfinal']))
    print(p_y_X.get_shape())
    
    return p_y_X
    """
    #Contrastive loss
    d = tf.reduce_sum(tf.square(dense_first - dense_second), 1)
    d_sqrt = tf.sqrt(d)
    loss = label * tf.square(tf.maximum(0., margin - d_sqrt)) + (1 - label) * d
    loss = 0.5 * tf.reduce_mean(loss)
    return loss
    """
    

In [15]:
100 * (100 - 1)/2

4950.0

In [18]:
#Load the dataset
X1,X2,indicators = form_verification_dataset(mel_new,one_hot_labels,sample=10)
print(indicators.shape)
num_samples, n_mels, mel_vals = X1.shape[0],X1.shape[1],X1.shape[2]


NameError: name 'mel_new' is not defined

In [13]:
import sklearn.metrics as sm
batch_size    = 1
learning_rate = 0.003
n_epoch       = 50
n_samples     = 1000#len(melspectrogram)                              # change to 1000 for entire dataset
cv_split      = 0.8                             
train_size    = int(n_samples * cv_split)                               
test_size     = n_samples - train_size
n_mels = 96
mel_vals = 1366
"""
Load X1, X2, indicators and track IDS.
"""

track_ids = pickle.load(open('track_ids.pkl','rb'))
X1 = pickle.load(open("X1_data.pkl",'rb'))
X2 = pickle.load(open("X2_data.pkl",'rb'))
indicators = pickle.load(open("indicator_data.pkl",'rb'))


#Split into training and testing
def make_splits_verif(X1,X2,indicators):
    inds = list(range(0,len(X1)))
    train_inds,test_inds,_,_ = train_test_split(inds,inds,random_state = 42, test_size = 0.33)
    X1_train = np.asarray([X1[idx] for idx in train_inds]).reshape(-1, n_mels,mel_vals,1)
    X1_test = np.asarray([X1[idx] for idx in test_inds]).reshape(-1, n_mels,mel_vals,1)
    
    X2_train = np.asarray([X2[idx] for idx in train_inds]).reshape(-1, n_mels,mel_vals,1)
    X2_test = np.asarray([X1[idx] for idx in test_inds]).reshape(-1, n_mels,mel_vals,1)
    
    y_train = np.asarray([indicators[idx] for idx in train_inds]).reshape(-1,2) 
    y_test = np.asarray([indicators[idx] for idx in test_inds]).reshape(-1,2)
    print("X1_train: " + str(X1_train.shape))
    print("X2_train: " + str(X2_train.shape))
    print("X1_test: " + str(X1_test.shape))
    print("X2_test: " + str(X2_test.shape))
    print("y_train: " + str(y_train.shape))
    print("y_test: " + str(y_test.shape))
    return X1_train, X2_train, y_train, X1_test, X2_test, y_test

def next_batch_verif(num, X1, X2, indicators):
    '''
    Return a total of `num` random samples and labels. 
    '''
    num_samples, n_mels, mel_vals = X1.shape[0],X1.shape[1],X1.shape[2]
    idx = np.arange(0 , len(X1))
    np.random.shuffle(idx)
    idx = idx[:num]
    X1_shuffle = [X1[i] for i in idx]
    X2_shuffle = [X2[i] for i in idx]
    indicators_shuffle = [indicators[i] for i in idx]
    return (np.asarray(X1_shuffle).reshape((num,n_mels,mel_vals,1)), 
            np.asarray(X1_shuffle).reshape((num,n_mels,mel_vals,1)), 
            np.asarray(indicators_shuffle).reshape(-1,2))


#Make data splits
X1_train, X2_train, y_train, X1_test, X2_test, y_test = make_splits_verif(X1,X2,indicators)

#Clear memory
del X1, X2, indicators

X1_train: (820, 96, 1366, 1)
X2_train: (820, 96, 1366, 1)
X1_test: (405, 96, 1366, 1)
X2_test: (405, 96, 1366, 1)
y_train: (820, 2)
y_test: (405, 2)


In [19]:
X_first = tf.placeholder("float", [None, X1_train.shape[1], X1_train.shape[2], 1],name="First_input_vector")
X_second = tf.placeholder("float", [None, X1_train.shape[1], X1_train.shape[2], 1],name="Second_input_vector")

y = tf.placeholder("float", [None, 2],name="Truth_labels")
lrate = tf.placeholder("float",name="Learning_rate")
keep_prob = tf.placeholder("float",name="Dropout")
phase_train = tf.placeholder(tf.bool, name='phase_train')

y_ = dual_cnn(X_first, X_second, weights, phase_train,keep_prob)
print(y_.shape)
predict_op = y_
print(y.shape,y_.shape)
# Train and Evaluate Model
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = y_))
train_op = tf.train.AdamOptimizer(1e-4).minimize(cost)
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))



(?, 256)
(?, 256)
(?, 128)
(?, 128)
(?, 256)
(?, 2)
(?, 2)
(?, 2) (?, 2)
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



In [23]:
batch_size = 20
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess:
    tf.initialize_all_variables().run()
    for i in range(n_epoch):
        X1_train_batch,X2_train_batch,y_train_batch = next_batch_verif(batch_size,X1_train,X2_train,y_train)
        print(X1_train_batch.shape)
        train_input_dict = {X_first: X1_train_batch,
                            X_second: X2_train_batch,
                            y: y_train_batch,
                            phase_train: True,
                            keep_prob: 0.5}
        _, c = sess.run([train_op,cost], feed_dict=train_input_dict)
    
        if i % 5 == 0:
            print("We are in epoch: " +str(i))
            print("Cost:" +  str(c))
            train_accuracy = accuracy.eval(feed_dict=train_input_dict)
            print('step %d, training accuracy %g' % (i, train_accuracy))
            test_input_dict = {X_first: X1_test[0:50],
                               X_second: X2_test[0:50],
                               y: y_test[0:100],
                               phase_train:False,
                               keep_prob: 1.0}
            predictions = sess.run(predict_op, feed_dict=test_input_dict)
            print('Epoch : ', i,  'AUC : ', sm.roc_auc_score(y_test, predictions, average='samples'))
            print('test accuracy %g' % accuracy.eval(feed_dict=test_input_dict))
        #test_predictions = np.array([0 if t < 0.5 else 1 for t in test_predictions])
        #print('Epoch : ', i,  'F1_score : ', sm.f1_score(y_test, test_predictions, pos_label=0))
        # print(i, np.mean(np.argmax(y_test[test_indices], axis=1) == predictions))
        # print sort_result(tags, predictions)[:5]


(20, 96, 1366, 1)


ResourceExhaustedError: OOM when allocating tensor with shape[20,96,1366,1366] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: bn/batchnorm/mul_1 = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_First_input_vector_2_0_1/_5, bn/batchnorm/mul)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: Mean/_147 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_3118_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'bn/batchnorm/mul_1', defined at:
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-19-932d1cad3320>", line 9, in <module>
    y_ = dual_cnn(X_first, X_second, weights, phase_train,keep_prob)
  File "<ipython-input-18-80be156476eb>", line 11, in dual_cnn
    x_first = batch_norm(x_first, mel_vals, phase_train)
  File "<ipython-input-2-965bee7e3c05>", line 16, in batch_norm
    normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\ops\nn_impl.py", line 833, in batch_normalization
    return x * inv + (
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\ops\math_ops.py", line 934, in binary_op_wrapper
    return func(x, y, name=name)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1161, in _mul_dispatch
    return gen_math_ops._mul(x, y, name=name)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 3091, in _mul
    "Mul", x=x, y=y, name=name)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\framework\ops.py", line 3271, in create_op
    op_def=op_def)
  File "C:\Users\Nitin\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\framework\ops.py", line 1650, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[20,96,1366,1366] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: bn/batchnorm/mul_1 = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_First_input_vector_2_0_1/_5, bn/batchnorm/mul)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: Mean/_147 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_3118_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [191]:
y_train.shape

AttributeError: 'list' object has no attribute 'shape'

In [39]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
sess

<tensorflow.python.client.session.Session at 0x239d13d77b8>

In [56]:
A = [[1],[0],[0],[0],[1],[1]]
B = [[0.2],[0.5],[0.6],[0.3],[0.2],[0.8]]
np.argmax(B,1)

array([0, 0, 0, 0, 0, 0], dtype=int64)

In [69]:
X1,X2,indicators = form_verification_dataset(melspectrogram,one_hot_labels,sample=10)

Indicators: 0 One Hot[[1. 0.]]


In [70]:
indicators

matrix([[1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.]])

In [28]:
(X1[0] - np.mean(X1[0]))/ np.std(X1[0])

array([[-0.1102909, -0.1102909, -0.1102909, ..., -0.1102909, -0.1102909,
        -0.1102909],
       [-0.1102909, -0.1102909, -0.1102909, ..., -0.1102909, -0.1102909,
        -0.1102909],
       [-0.1102909, -0.1102909, -0.1102909, ..., -0.1102909, -0.1102909,
        -0.1102909],
       ...,
       [-0.1102909, -0.1102909, -0.1102909, ..., -0.1102909, -0.1102909,
        -0.1102909],
       [-0.1102909, -0.1102909, -0.1102909, ..., -0.1102909, -0.1102909,
        -0.1102909],
       [-0.1102909, -0.1102909, -0.1102909, ..., -0.1102909, -0.1102909,
        -0.1102909]])

In [37]:
#List of optimizations
print("Memory requirements if vectors were of type float16: %d MB"%(X1.astype("float16").nbytes//(1024*1024)))
print("Memory requirements if vectors were of type float32: %d MB"%(X1.astype("float32").nbytes//(1024*1024)))
print("Memory requirements if vectors were of type float64: %d MB"%(X1.astype("float64").nbytes//(1024*1024)))


Memory requirements (in MB) if X1 was of type float16: 306
Memory requirements (in MB) if X1 was of type float32: 612
Memory requirements (in MB) if X1 was of type float64: 1225
