Skip to content

Commit

Permalink
Pass all the tests for R kmeans_cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
vmarkovtsev committed Feb 18, 2017
1 parent 2868527 commit 4997bc8
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -1,4 +1,6 @@
.idea
cmake-build-*
**/*.cbp
**/CMakeCache.txt
**/CMakeFiles
**/.DS_Store
Expand Down
34 changes: 24 additions & 10 deletions src/r.cc
Expand Up @@ -84,17 +84,18 @@ static SEXP r_kmeans_cuda(SEXP args) {
int chunks_size = 0;
{
SEXP samples_obj = kwargs["samples"];
if (TYPEOF(samples_obj) == VECSXP) {
if (isReal(samples_obj)) {
chunks_size = 1;
samples_chunks.reset(new SEXP[1]);
samples_chunks[0] = samples_obj;
} else if (TYPEOF(samples_obj) == VECSXP) {
chunks_size = length(samples_obj);
samples_chunks.reset(new SEXP[chunks_size]);
for (unsigned i = 0; samples_obj != R_NilValue;
i++, samples_obj = CDR(samples_obj)) {
samples_chunks[i] = CAR(samples_obj);
for (int i = 0; i < chunks_size; i++) {
samples_chunks[i] = VECTOR_ELT(samples_obj, i);
}
} else {
chunks_size = 1;
samples_chunks.reset(new SEXP[1]);
samples_chunks[0] = samples_obj;
error("\"samples\" must be a 2D real matrix or a vector of 2D real matrices");
}
}
int samples_size = 0, features_size = 0;
Expand Down Expand Up @@ -146,6 +147,8 @@ static SEXP r_kmeans_cuda(SEXP args) {
|| clusters_size >= samples_size) {
error("\"clusters\" is too big");
}
auto centroids = std::unique_ptr<float[]>(
new float[clusters_size * features_size]);
KMCUDAInitMethod init = kmcudaInitMethodPlusPlus;
int afkmc2_m = 0;
auto init_iter = kwargs.find("init");
Expand All @@ -171,8 +174,21 @@ static SEXP r_kmeans_cuda(SEXP args) {
} else if (length(init_iter->second) != 1) {
error("\"init\" has wrong number of parameters");
}
} else if (isReal(init_iter->second)) {
init = kmcudaInitMethodImport;
double *centroids_double = REAL(init_iter->second);
SEXP dims = getAttrib(init_iter->second, R_DimSymbol);
if (length(dims) != 2
|| INTEGER(dims)[0] != clusters_size
|| INTEGER(dims)[1] != features_size) {
error("invalid centroids dimensions in \"init\"");
}
#pragma omp parallel for simd
for (int i = 0; i < clusters_size * features_size; i++) {
centroids[i] = centroids_double[i];
}
} else {
error("\"init\" must be either a string or a list");
error("\"init\" must be either a string or a list or a 2D matrix");
}
}
float tolerance = 0.01;
Expand Down Expand Up @@ -215,8 +231,6 @@ static SEXP r_kmeans_cuda(SEXP args) {
average_distance_ptr = &average_distance;
}
}
auto centroids = std::unique_ptr<float[]>(
new float[clusters_size * features_size]);
auto assignments = std::unique_ptr<uint32_t[]>(new uint32_t[samples_size]);
auto result = kmeans_cuda(
init, &afkmc2_m, tolerance, yinyang_t, metric, samples_size, features_size,
Expand Down
36 changes: 36 additions & 0 deletions src/test.R
Expand Up @@ -27,6 +27,42 @@ if (exists("testing")) {
print(sprintf("Reassignments: %f", reasses))
expect_lt(reasses, 0.01)
})
test_that("MultiSamples",{
set.seed(42)
samples1 <- replicate(4, runif(16000))
samples2 <- replicate(4, runif(16000))
result = .External("kmeans_cuda", list(samples1, samples2), 50,
init="kmeans++", seed=777, verbosity=2)
kmcuda_asses = replicate(1, result$assignments)
expect_equal(length(kmcuda_asses), 32000)
attach(kmeans(rbind(samples1, samples2), result$centroids, iter.max=1))
reasses = length(intersect(kmcuda_asses, cluster)) / length(cluster)
print(sprintf("Reassignments: %f", reasses))
expect_lt(reasses, 0.01)
})
test_that("AFK-MC2",{
set.seed(42)
samples <- replicate(4, runif(16000))
result = .External("kmeans_cuda", samples, 50, tolerance=0.01,
init=c("afkmc2", 100), seed=777, verbosity=2)
kmcuda_asses = replicate(1, result$assignments)
attach(kmeans(samples, result$centroids, iter.max=1))
reasses = length(intersect(kmcuda_asses, cluster)) / length(cluster)
print(sprintf("Reassignments: %f", reasses))
expect_lt(reasses, 0.01)
})
test_that("ImportCentroids",{
set.seed(42)
samples <- replicate(4, runif(16000))
centroids <- replicate(4, runif(50))
result = .External("kmeans_cuda", samples, 50, tolerance=0.01,
init=centroids, seed=777, verbosity=2)
kmcuda_asses = replicate(1, result$assignments)
attach(kmeans(samples, result$centroids, iter.max=1))
reasses = length(intersect(kmcuda_asses, cluster)) / length(cluster)
print(sprintf("Reassignments: %f", reasses))
expect_lt(reasses, 0.01)
})
} else {
testing <- TRUE
cwd <- getwd()
Expand Down

0 comments on commit 4997bc8

Please sign in to comment.