From a0f2a52688945e0fb5304ce86da5202883730e0d Mon Sep 17 00:00:00 2001 From: Sylvain Chevallier Date: Wed, 14 Oct 2015 13:19:26 +0200 Subject: [PATCH] new example for univariate case --- example_univariate.py | 60 ++++++++++++++++++++++++++++++++++--------- mdla.py | 25 ++++++++---------- 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/example_univariate.py b/example_univariate.py index 304230b..122f468 100644 --- a/example_univariate.py +++ b/example_univariate.py @@ -9,39 +9,41 @@ from numpy.random import rand, randn, permutation, randint # TODO: Add SNR, repeat experiments to make stats, make a fast and a -# long version, use callback to compute distance +# long version, def plot_univariate(objective_error, detection_rate, wasserstein, n_iter, figname): fig = plt.figure(figsize=(15,5)) + if n_iter == 1: step = 5 + else: step = n_iter # plotting data from objective error objerr = fig.add_subplot(1,3,1) - _ = objerr.plot(n_iter*arange(1, len(objective_error)+1), objective_error, + _ = objerr.plot(step*arange(1, len(objective_error)+1), objective_error, color='green', label=r'Objective error') objerr.axis([0, len(objective_error)-1, min(objective_error), max(objective_error)]) - objerr.set_xticks(arange(0, n_iter*len(objective_error)+1, n_iter)) + objerr.set_xticks(arange(0, step*len(objective_error)+1, step)) objerr.set_xlabel('Iteration') objerr.set_ylabel(r'Error (no unit)') objerr.legend(loc='upper right') # plotting data from detection rate 0.99 detection = fig.add_subplot(1,3,2) - _ = detection.plot(n_iter*arange(1,len(detection_rate)+1), detection_rate, + _ = detection.plot(step*arange(1,len(detection_rate)+1), detection_rate, color='magenta', label=r'Detection rate 0.99') detection.axis([0, len(detection_rate), 0, 100]) - detection.set_xticks(arange(0, n_iter*len(detection_rate)+1, n_iter)) + detection.set_xticks(arange(0, step*len(detection_rate)+1, step)) detection.set_xlabel('Iteration') detection.set_ylabel(r'Recovery rate (in %)') detection.legend(loc='upper left') # plotting data from our metric met = fig.add_subplot(1,3,3) - _ = met.plot(n_iter*arange(1, len(wasserstein)+1), 100-wasserstein, + _ = met.plot(step*arange(1, len(wasserstein)+1), 100-wasserstein, label=r'$d_W$', color='red') met.axis([0, len(wasserstein), 0, 100]) - met.set_xticks(arange(0, n_iter*len(wasserstein)+1, n_iter)) + met.set_xticks(arange(0, step*len(wasserstein)+1, step)) met.set_xlabel('Iteration') met.set_ylabel(r'Recovery rate (in %)') met.legend(loc='upper left') @@ -92,7 +94,7 @@ def _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, n_samples, n_dims = 1500, 1 n_features = kernel_init_len = 20 n_nonzero_coefs = 3 -n_kernels, max_iter, n_iter, learning_rate = 50, 10, 5, 1.5 +n_kernels, max_iter, n_iter, learning_rate = 50, 10, 3, 1.5 n_jobs, batch_size = -1, 10 detection_rate, wasserstein, objective_error = list(), list(), list() @@ -100,10 +102,12 @@ def _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, n_kernels, n_samples, n_features, n_dims) -# Create a dictionary -dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)] -for i in range(len(dict_init)): - dict_init[i] /= norm(dict_init[i], 'fro') +# # Create a dictionary +# dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)] +# for i in range(len(dict_init)): +# dict_init[i] /= norm(dict_init[i], 'fro') +dict_init = None + learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, batch_size=batch_size, n_iter=n_iter, n_nonzero_coefs=n_nonzero_coefs, @@ -128,3 +132,35 @@ def _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, array(wasserstein), n_iter, 'univariate-case') # Another possibility is to rely on a callback function such as +def callback_distance(loc): + ii, iter_offset = loc['ii'], loc['iter_offset'] + n_batches = loc['n_batches'] + if np.mod((ii-iter_offset)/int(n_batches), n_iter) == 0: + # Compute distance only every 5 iterations, as in previous case + d = loc['dict_obj'] + d.wasserstein.append(emd(loc['dictionary'], d.generating_dict, + 'chordal', scale=True)) + d.detection_rate.append(detectionRate(loc['dictionary'], + d.generating_dict, 0.99)) + d.objective_error.append(loc['current_cost']) + +# reinitializing the random generator +learned_dict2 = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, + batch_size=batch_size, n_iter=max_iter*n_iter, + n_nonzero_coefs=n_nonzero_coefs, + callback=callback_distance, + n_jobs=n_jobs, learning_rate=learning_rate, + kernel_init_len=kernel_init_len, verbose=1, + dict_init=dict_init, random_state=rng_global) +learned_dict2.generating_dict = list(generating_dict) +learned_dict2.wasserstein = list() +learned_dict2.detection_rate = list() +learned_dict2.objective_error = list() + +learned_dict2 = learned_dict2.fit(X) + +plot_univariate(array(learned_dict2.objective_error), + array(learned_dict2.detection_rate), + array(learned_dict2.wasserstein), + n_iter=1, figname='univariate-case-callback') + diff --git a/mdla.py b/mdla.py index b29f40d..200b633 100644 --- a/mdla.py +++ b/mdla.py @@ -841,7 +841,7 @@ def multivariate_dict_learning_online(X, n_kernels=2, n_nonzero_coefs=1, n_jobs=1, kernel_init_len=None, learning_rate=None, random_state=None, dict_obj = None): - """Solves a multivariate dictionary learning matrix factorization problem online. + """Solves an online multivariate dictionary learning factorization problem Finds the best dictionary and the corresponding sparse code for approximating the data matrices X by solving:: @@ -981,7 +981,13 @@ def multivariate_dict_learning_online(X, n_kernels=2, n_nonzero_coefs=1, learning_rate=mu, random_state=random_state) - if np.mod((ii-iter_offset),int(n_batches))==0: + # Cost function + current_cost = 0.0 + for i in range(len(r)): + current_cost += np.linalg.norm(r[i], 'fro') + len(code[i]) + errors.append(current_cost/len(r)) + + if np.mod((ii-iter_offset), int(n_batches)) == 0: if verbose >= 2: print ('[MDL] Dictionary updated, iteration %d '\ 'with learning rate %.2f (elapsed time: '\ @@ -995,18 +1001,12 @@ def multivariate_dict_learning_online(X, n_kernels=2, n_nonzero_coefs=1, if ii == (iter_offset+1)*int(n_batches) and verbose == 1: print ('Expecting this learning iterations to finish in', - (time()-t0)*(n_iter-iter_offset)/60., 'm') + (time()-t0)*n_iter/60., 'm') # if verbose == 1: print ('Time from begining is', time()-t0, 's, with n_iter=', n_iter, ', iter_offset=', iter_offset, - ', i.e.', n_iter-iter_offset, 'iterations to go. ii=', + ', i.e.', n_iter, 'iterations to go. ii=', ii) - - # Cost function - current_cost = 0.0 - for i in range(len(r)): - current_cost += np.linalg.norm(r[i], 'fro') + len(code[i]) - errors.append(current_cost/len(r)) except KeyboardInterrupt: break @@ -1414,11 +1414,6 @@ def fit(self, X, y=None): self.iter_offset_ = self.n_iter self.error_ = list(e) - # if self.verbose >= 1: - # print ('\nEnd of fit') - # print (self.kernels_[7]) - # print () - return self def partial_fit(self, X, y=None, iter_offset=None):