Skip to content

Commit

Permalink
new example for univariate case
Browse files Browse the repository at this point in the history
  • Loading branch information
Sylvain Chevallier committed Oct 14, 2015
1 parent 0d8c219 commit a0f2a52
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 27 deletions.
60 changes: 48 additions & 12 deletions example_univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,41 @@
from numpy.random import rand, randn, permutation, randint

# TODO: Add SNR, repeat experiments to make stats, make a fast and a
# long version, use callback to compute distance
# long version,

def plot_univariate(objective_error, detection_rate, wasserstein,
n_iter, figname):
fig = plt.figure(figsize=(15,5))
if n_iter == 1: step = 5
else: step = n_iter

# plotting data from objective error
objerr = fig.add_subplot(1,3,1)
_ = objerr.plot(n_iter*arange(1, len(objective_error)+1), objective_error,
_ = objerr.plot(step*arange(1, len(objective_error)+1), objective_error,
color='green', label=r'Objective error')
objerr.axis([0, len(objective_error)-1, min(objective_error),
max(objective_error)])
objerr.set_xticks(arange(0, n_iter*len(objective_error)+1, n_iter))
objerr.set_xticks(arange(0, step*len(objective_error)+1, step))
objerr.set_xlabel('Iteration')
objerr.set_ylabel(r'Error (no unit)')
objerr.legend(loc='upper right')

# plotting data from detection rate 0.99
detection = fig.add_subplot(1,3,2)
_ = detection.plot(n_iter*arange(1,len(detection_rate)+1), detection_rate,
_ = detection.plot(step*arange(1,len(detection_rate)+1), detection_rate,
color='magenta', label=r'Detection rate 0.99')
detection.axis([0, len(detection_rate), 0, 100])
detection.set_xticks(arange(0, n_iter*len(detection_rate)+1, n_iter))
detection.set_xticks(arange(0, step*len(detection_rate)+1, step))
detection.set_xlabel('Iteration')
detection.set_ylabel(r'Recovery rate (in %)')
detection.legend(loc='upper left')

# plotting data from our metric
met = fig.add_subplot(1,3,3)
_ = met.plot(n_iter*arange(1, len(wasserstein)+1), 100-wasserstein,
_ = met.plot(step*arange(1, len(wasserstein)+1), 100-wasserstein,
label=r'$d_W$', color='red')
met.axis([0, len(wasserstein), 0, 100])
met.set_xticks(arange(0, n_iter*len(wasserstein)+1, n_iter))
met.set_xticks(arange(0, step*len(wasserstein)+1, step))
met.set_xlabel('Iteration')
met.set_ylabel(r'Recovery rate (in %)')
met.legend(loc='upper left')
Expand Down Expand Up @@ -92,18 +94,20 @@ def _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels,
n_samples, n_dims = 1500, 1
n_features = kernel_init_len = 20
n_nonzero_coefs = 3
n_kernels, max_iter, n_iter, learning_rate = 50, 10, 5, 1.5
n_kernels, max_iter, n_iter, learning_rate = 50, 10, 3, 1.5
n_jobs, batch_size = -1, 10
detection_rate, wasserstein, objective_error = list(), list(), list()

generating_dict, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs,
n_kernels, n_samples, n_features,
n_dims)

# Create a dictionary
dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)]
for i in range(len(dict_init)):
dict_init[i] /= norm(dict_init[i], 'fro')
# # Create a dictionary
# dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)]
# for i in range(len(dict_init)):
# dict_init[i] /= norm(dict_init[i], 'fro')
dict_init = None

learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels,
batch_size=batch_size, n_iter=n_iter,
n_nonzero_coefs=n_nonzero_coefs,
Expand All @@ -128,3 +132,35 @@ def _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels,
array(wasserstein), n_iter, 'univariate-case')

# Another possibility is to rely on a callback function such as
def callback_distance(loc):
ii, iter_offset = loc['ii'], loc['iter_offset']
n_batches = loc['n_batches']
if np.mod((ii-iter_offset)/int(n_batches), n_iter) == 0:
# Compute distance only every 5 iterations, as in previous case
d = loc['dict_obj']
d.wasserstein.append(emd(loc['dictionary'], d.generating_dict,
'chordal', scale=True))
d.detection_rate.append(detectionRate(loc['dictionary'],
d.generating_dict, 0.99))
d.objective_error.append(loc['current_cost'])

# reinitializing the random generator
learned_dict2 = MiniBatchMultivariateDictLearning(n_kernels=n_kernels,
batch_size=batch_size, n_iter=max_iter*n_iter,
n_nonzero_coefs=n_nonzero_coefs,
callback=callback_distance,
n_jobs=n_jobs, learning_rate=learning_rate,
kernel_init_len=kernel_init_len, verbose=1,
dict_init=dict_init, random_state=rng_global)
learned_dict2.generating_dict = list(generating_dict)
learned_dict2.wasserstein = list()
learned_dict2.detection_rate = list()
learned_dict2.objective_error = list()

learned_dict2 = learned_dict2.fit(X)

plot_univariate(array(learned_dict2.objective_error),
array(learned_dict2.detection_rate),
array(learned_dict2.wasserstein),
n_iter=1, figname='univariate-case-callback')

25 changes: 10 additions & 15 deletions mdla.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ def multivariate_dict_learning_online(X, n_kernels=2, n_nonzero_coefs=1,
n_jobs=1, kernel_init_len=None,
learning_rate=None, random_state=None,
dict_obj = None):
"""Solves a multivariate dictionary learning matrix factorization problem online.
"""Solves an online multivariate dictionary learning factorization problem
Finds the best dictionary and the corresponding sparse code for
approximating the data matrices X by solving::
Expand Down Expand Up @@ -981,7 +981,13 @@ def multivariate_dict_learning_online(X, n_kernels=2, n_nonzero_coefs=1,
learning_rate=mu,
random_state=random_state)

if np.mod((ii-iter_offset),int(n_batches))==0:
# Cost function
current_cost = 0.0
for i in range(len(r)):
current_cost += np.linalg.norm(r[i], 'fro') + len(code[i])
errors.append(current_cost/len(r))

if np.mod((ii-iter_offset), int(n_batches)) == 0:
if verbose >= 2:
print ('[MDL] Dictionary updated, iteration %d '\
'with learning rate %.2f (elapsed time: '\
Expand All @@ -995,18 +1001,12 @@ def multivariate_dict_learning_online(X, n_kernels=2, n_nonzero_coefs=1,

if ii == (iter_offset+1)*int(n_batches) and verbose == 1:
print ('Expecting this learning iterations to finish in',
(time()-t0)*(n_iter-iter_offset)/60., 'm')
(time()-t0)*n_iter/60., 'm')
# if verbose == 1:
print ('Time from begining is', time()-t0, 's, with n_iter=',
n_iter, ', iter_offset=', iter_offset,
', i.e.', n_iter-iter_offset, 'iterations to go. ii=',
', i.e.', n_iter, 'iterations to go. ii=',
ii)

# Cost function
current_cost = 0.0
for i in range(len(r)):
current_cost += np.linalg.norm(r[i], 'fro') + len(code[i])
errors.append(current_cost/len(r))
except KeyboardInterrupt:
break

Expand Down Expand Up @@ -1414,11 +1414,6 @@ def fit(self, X, y=None):
self.iter_offset_ = self.n_iter
self.error_ = list(e)

# if self.verbose >= 1:
# print ('\nEnd of fit')
# print (self.kernels_[7])
# print ()

return self

def partial_fit(self, X, y=None, iter_offset=None):
Expand Down

0 comments on commit a0f2a52

Please sign in to comment.