Skip to content

Commit

Permalink
extended example with a combined kernel weight selection
Browse files Browse the repository at this point in the history
  • Loading branch information
karlnapf committed Feb 22, 2013
1 parent 0634ac1 commit b2ecc90
Showing 1 changed file with 93 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,13 @@ def kernel_choice_linear_time_mmd_opt_single():
from shogun.Features import GaussianBlobsDataGenerator
from shogun.Kernel import GaussianKernel, CombinedKernel
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import MMDKernelSelectionMedian
from shogun.Statistics import MMDKernelSelectionMax
from shogun.Statistics import MMDKernelSelectionOpt
from shogun.Statistics import MMDKernelSelectionCombMaxL2
from shogun.Statistics import MMDKernelSelectionCombOpt
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
from shogun.Distance import EuclideanDistance
from shogun.Mathematics import Statistics, Math

# note that the linear time statistic is designed for much larger datasets
m=10000
m=1000
distance=10
stretch=5
num_blobs=3
Expand All @@ -50,7 +46,6 @@ def kernel_choice_linear_time_mmd_opt_single():
grid(True)
plot(data[0][num_plot+1:2*num_plot], data[1][num_plot+1:2*num_plot], 'b.', label='$x$', alpha=0.5)
title('$Y\sim q$')
#show()


# create combined kernel with Gaussian kernels inside (shoguns Gaussian kernel is
Expand All @@ -62,15 +57,19 @@ def kernel_choice_linear_time_mmd_opt_single():
combined.append_kernel(GaussianKernel(10, widths[i]))

# mmd instance using streaming features, blocksize of 10000
block_size=10000
block_size=1000
mmd=LinearTimeMMD(combined, gen_p, gen_q, m, block_size)

# kernel selection instance (this can easily replaced by the other methods for selecting
# single kernels
selection=MMDKernelSelectionOpt(mmd)

# print ratios of mmd divided by standard deviation (just for information)
print "ratios:", selection.compute_measures()
ratios=selection.compute_measures()
print "ratios:", ratios
subplot(2,2,3)
plot(ratios)
title('ratios')

# perform kernel selection
kernel=selection.select_kernel()
Expand Down Expand Up @@ -99,6 +98,92 @@ def kernel_choice_linear_time_mmd_opt_single():

print "type I error:", mean(typeIerrors), ", type II error:", mean(typeIIerrors)


def kernel_choice_linear_time_mmd_opt_comb():
from shogun.Features import RealFeatures
from shogun.Features import GaussianBlobsDataGenerator
from shogun.Kernel import GaussianKernel, CombinedKernel
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import MMDKernelSelectionCombOpt
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
from shogun.Distance import EuclideanDistance
from shogun.Mathematics import Statistics, Math

# note that the linear time statistic is designed for much larger datasets
m=1000
distance=10
stretch=5
num_blobs=3
angle=pi/4

# streaming data generator
gen_p=GaussianBlobsDataGenerator(num_blobs, distance, 1, 0)
gen_q=GaussianBlobsDataGenerator(num_blobs, distance, stretch, angle)

# stream some data and plot
num_plot=1000
features=gen_p.get_streamed_features(num_plot)
features=features.create_merged_copy(gen_q.get_streamed_features(num_plot))
data=features.get_feature_matrix()

figure()
subplot(2,2,1)
grid(True)
plot(data[0][0:num_plot], data[1][0:num_plot], 'r.', label='$x$')
title('$X\sim p$')
subplot(2,2,2)
grid(True)
plot(data[0][num_plot+1:2*num_plot], data[1][num_plot+1:2*num_plot], 'b.', label='$x$', alpha=0.5)
title('$Y\sim q$')


# create combined kernel with Gaussian kernels inside (shoguns Gaussian kernel is
# different to the standard form, see documentation)
sigmas=[2**x for x in range(-3,10)]
widths=[x*x*2 for x in sigmas]
combined=CombinedKernel()
for i in range(len(sigmas)):
combined.append_kernel(GaussianKernel(10, widths[i]))

# mmd instance using streaming features, blocksize of 10000
block_size=10000
mmd=LinearTimeMMD(combined, gen_p, gen_q, m, block_size)

# kernel selection instance (this can easily replaced by the other methods for selecting
# combined kernels
selection=MMDKernelSelectionCombOpt(mmd)

# perform kernel selection (kernel is automatically set)
kernel=selection.select_kernel()
kernel=CombinedKernel.obtain_from_generic(kernel)
print "selected kernel weights:", kernel.get_subkernel_weights()
subplot(2,2,3)
plot(kernel.get_subkernel_weights())
title("Kernel weights")

# compute tpye I and II error (use many more trials). Type I error is only
# estimated to check MMD1_GAUSSIAN method for estimating the null
# distribution. Note that testing has to happen on difference data than
# kernel selecting, but the linear time mmd does this implicitly
mmd.set_null_approximation_method(MMD1_GAUSSIAN)

# number of trials should be larger to compute tight confidence bounds
num_trials=5;
alpha=0.05 # test power
typeIerrors=[0 for x in range(num_trials)]
typeIIerrors=[0 for x in range(num_trials)]
for i in range(num_trials):
# this effectively means that p=q - rejecting is tpye I error
mmd.set_simulate_h0(True)
typeIerrors[i]=mmd.perform_test()>alpha
mmd.set_simulate_h0(False)

typeIIerrors[i]=mmd.perform_test()>alpha

print "type I error:", mean(typeIerrors), ", type II error:", mean(typeIIerrors)

if __name__=='__main__':
print('MMDKernelSelection')
kernel_choice_linear_time_mmd_opt_single()
kernel_choice_linear_time_mmd_opt_comb()
#show()

0 comments on commit b2ecc90

Please sign in to comment.