Skip to content

Commit

Permalink
- fixes when reading file with empty rows (now values within a row)
Browse files Browse the repository at this point in the history
- fixes #3
  • Loading branch information
dselivanov committed Jan 13, 2020
1 parent a2f5f46 commit 5cf9c19
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 36 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
^cran-comments\.md$
^CRAN-RELEASE$
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: sparsio
Type: Package
Title: I/O Operations with Sparse Matrices
Version: 1.0.0
Date: 2017-06-28
Version: 1.0.1
Date: 2020-01-13
Authors@R: c(person("Dmitriy", "Selivanov", role = c("aut", "cre"),
email = "selivanov.dmitriy@gmail.com"),
person("Felix", "Riedel", role = c("aut"),
Expand All @@ -25,4 +25,4 @@ Suggests:
testthat
URL: https://github.com/dselivanov/sparsio
BugReports: https://github.com/dselivanov/sparsio/issues
RoxygenNote: 6.0.1
RoxygenNote: 6.1.1
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ export(write_svmlight)
import(Matrix)
import(Rcpp)
import(methods)
useDynLib(sparsio)
useDynLib(sparsio, .registration = TRUE)
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

read_svmlight_cpp <- function(filename, zero_based = 1L) {
.Call('sparsio_read_svmlight_cpp', PACKAGE = 'sparsio', filename, zero_based)
.Call(`_sparsio_read_svmlight_cpp`, filename, zero_based)
}

write_svmlight_cpp <- function(x, y, filename, zero_based = 1L) {
invisible(.Call('sparsio_write_svmlight_cpp', PACKAGE = 'sparsio', x, y, filename, zero_based))
invisible(.Call(`_sparsio_write_svmlight_cpp`, x, y, filename, zero_based))
}

2 changes: 1 addition & 1 deletion R/sparsio.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @useDynLib sparsio
#' @useDynLib sparsio, .registration = TRUE
#' @import Matrix
#' @import Rcpp
#' @import methods
Expand Down
19 changes: 0 additions & 19 deletions cran-comments.md

This file was deleted.

8 changes: 4 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using namespace Rcpp;

// read_svmlight_cpp
List read_svmlight_cpp(Rcpp::String filename, int zero_based);
RcppExport SEXP sparsio_read_svmlight_cpp(SEXP filenameSEXP, SEXP zero_basedSEXP) {
RcppExport SEXP _sparsio_read_svmlight_cpp(SEXP filenameSEXP, SEXP zero_basedSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Expand All @@ -19,7 +19,7 @@ END_RCPP
}
// write_svmlight_cpp
void write_svmlight_cpp(S4 x, NumericVector y, Rcpp::String filename, int zero_based);
RcppExport SEXP sparsio_write_svmlight_cpp(SEXP xSEXP, SEXP ySEXP, SEXP filenameSEXP, SEXP zero_basedSEXP) {
RcppExport SEXP _sparsio_write_svmlight_cpp(SEXP xSEXP, SEXP ySEXP, SEXP filenameSEXP, SEXP zero_basedSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< S4 >::type x(xSEXP);
Expand All @@ -32,8 +32,8 @@ END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"sparsio_read_svmlight_cpp", (DL_FUNC) &sparsio_read_svmlight_cpp, 2},
{"sparsio_write_svmlight_cpp", (DL_FUNC) &sparsio_write_svmlight_cpp, 4},
{"_sparsio_read_svmlight_cpp", (DL_FUNC) &_sparsio_read_svmlight_cpp, 2},
{"_sparsio_write_svmlight_cpp", (DL_FUNC) &_sparsio_write_svmlight_cpp, 4},
{NULL, NULL, 0}
};

Expand Down
9 changes: 5 additions & 4 deletions src/read_svmlight.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,20 @@ List read_svmlight_cpp(Rcpp::String filename, int zero_based = 1) {
nrow++;
}

int id;
double value;
// foreach token (id:value) in line
while (!tokenStream.eof()) {
std::getline(tokenStream, token, ' ');
int id;
double value;
std::getline(tokenStream, token, ' ');
if (!token.empty()) {
sscanf(token.c_str(), "%d:%lf", &id, &value);
// case when indices start from 1
id = id - index_start_with;
ncol = std::max(id, ncol);
x_values.push_back(value);

col_indices.push_back(id);
pointer_counter++;
}
}
}
ncol++;
Expand Down
17 changes: 17 additions & 0 deletions tests/testthat/test-empty-rows.svmlight
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0 70628:1
21 changes: 19 additions & 2 deletions tests/testthat/test-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,25 @@ test_that("read basic", {

test_that("read different formats", {
formats = c("CsparseMatrix", "RsparseMatrix", "TsparseMatrix")
for( f in formats) {
for (f in formats) {
m = read_svmlight("test.svmlight", type = f)
expect_true(inherits(m$x, f))
}
})
})

test_that("read file with empty rows", {
ln = readLines("test-empty-rows.svmlight")
# find max col
all_vals = strsplit(ln, " ", fixed = T)
all_vals = all_vals[grepl(":", all_vals)]
all_vals = unlist(all_vals, recursive = F, use.names = F)
all_vals = strsplit(all_vals, ":", fixed = T)
all_vals = unlist(all_vals, recursive = F, use.names = F)
# +1 because of 0-based indexing
n_col_true = max(as.numeric(all_vals)) + 1

n_row_true = length(ln)
m = read_svmlight("test-empty-rows.svmlight", type = "RsparseMatrix")
expect_equal(dim(m$x)[1], n_row_true)
expect_equal(dim(m$x)[2], n_col_true)
})

0 comments on commit 5cf9c19

Please sign in to comment.