Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use String instead of Symbol #2388

Merged
merged 25 commits into from
Feb 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ add_library(dplyr ${SOURCE_FILES})
add_custom_target(
Rinstall
# Using MAKEFLAGS=-j$(nproc) didn't work
COMMAND MAKEFLAGS=-j8 nice R CMD INSTALL --libs-only --no-test-load ${dplyr_SOURCE_DIR}
COMMAND sh -c "MAKEFLAGS='-j8 -O' nice R CMD INSTALL --libs-only --no-test-load ${dplyr_SOURCE_DIR}"
)
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ S3method(group_size,grouped_df)
S3method(group_size,rowwise_df)
S3method(group_size,tbl_sql)
S3method(group_vars,default)
S3method(group_vars,grouped_df)
S3method(group_vars,tbl_cube)
S3method(group_vars,tbl_lazy)
S3method(groups,data.frame)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# dplyr 0.5.0.9000

* Internally, column names are always represented as character vectors,
and not as language symbols, to avoid encoding problems on Windows
(#1950, #2387, #2388).

* `collect()` will automatically LIMIT the result to the `n`, the number of
rows requested. This will provide the query planner with more information
that it may be able to use to improve execution time (#2083).
Expand Down
7 changes: 5 additions & 2 deletions R/grouped-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ n_groups.grouped_df <- function(x) {

#' @export
groups.grouped_df <- function(x) {
# Implement group_vars.grouped_df() instead if this assertion fails
stopifnot(is.list(attr(x, "vars")))
lapply(group_vars(x), as.name)
}

#' @export
group_vars.grouped_df <- function(x) {
attr(x, "vars")
}

Expand Down
4 changes: 1 addition & 3 deletions inst/include/dplyr/DataFrameJoinVisitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ namespace dplyr {
set_class(out, classes);
set_rownames(out, nrows);
out.names() = visitor_names_left;
SEXP vars = left.attr("vars");
if (!Rf_isNull(vars))
out.attr("vars") = vars;
copy_vars(out, left);
return (SEXP)out;
}

Expand Down
16 changes: 7 additions & 9 deletions inst/include/dplyr/DataFrameSubsetVisitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace dplyr {

const Rcpp::DataFrame& data;
pointer_vector<SubsetVectorVisitor> visitors;
Rcpp::CharacterVector visitor_names;
SymbolVector visitor_names;
int nvisitors;

public:
Expand All @@ -34,21 +34,21 @@ namespace dplyr {
}
}

DataFrameSubsetVisitors(const Rcpp::DataFrame& data_, const Rcpp::CharacterVector& names) :
DataFrameSubsetVisitors(const DataFrame& data_, const SymbolVector& names) :
data(data_),
visitors(),
visitor_names(names),
nvisitors(visitor_names.size())
{

IntegerVector indx = r_match(names, data.names());
IntegerVector indx = names.match_in_table(data.names());

int n = indx.size();
for (int i=0; i<n; i++) {

int pos = indx[i];
if (pos == NA_INTEGER) {
stop("unknown column '%s' ", CHAR(names[i]));
stop("unknown column '%s' ", names[i].get_cstring());
}

SubsetVectorVisitor* v = subset_visitor(data[pos - 1]);
Expand Down Expand Up @@ -98,7 +98,7 @@ namespace dplyr {
return visitors[k];
}

Rcpp::String name(int k) const {
const SymbolString name(int k) const {
return visitor_names[k];
}

Expand All @@ -112,15 +112,13 @@ namespace dplyr {
set_class(x, classes);
set_rownames(x, nrows);
x.names() = visitor_names;
SEXP vars = data.attr("vars");
if (!Rf_isNull(vars))
x.attr("vars") = vars;
copy_vars(x, data);
}

};

template <typename Index>
DataFrame subset(DataFrame df, const Index& indices, CharacterVector columns, CharacterVector classes) {
DataFrame subset(DataFrame df, const Index& indices, const SymbolVector& columns, const CharacterVector& classes) {
return DataFrameSubsetVisitors(df, columns).subset(indices, classes);
}

Expand Down
9 changes: 5 additions & 4 deletions inst/include/dplyr/DataFrameVisitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <dplyr/visitor_set/VisitorSetMixin.h>

#include <dplyr/VectorVisitor.h>
#include <tools/SymbolVector.h>

namespace dplyr {

Expand All @@ -19,15 +20,15 @@ namespace dplyr {

const Rcpp::DataFrame& data;
pointer_vector<VectorVisitor> visitors;
Rcpp::CharacterVector visitor_names;
SymbolVector visitor_names;
int nvisitors;

public:
typedef VectorVisitor visitor_type;

DataFrameVisitors(const Rcpp::DataFrame& data_);
DataFrameVisitors(const DataFrame& data_);

DataFrameVisitors(const Rcpp::DataFrame& data_, const Rcpp::CharacterVector& names);
DataFrameVisitors(const DataFrame& data_, const SymbolVector& names);

inline int size() const {
return nvisitors;
Expand All @@ -36,7 +37,7 @@ namespace dplyr {
return visitors[k];
}

Rcpp::String name(int k) const {
const SymbolString name(int k) const {
return visitor_names[k];
}

Expand Down
4 changes: 2 additions & 2 deletions inst/include/dplyr/Gatherer.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ namespace dplyr {
}

template <typename Data, typename Subsets>
inline Gatherer* gatherer(GroupedCallProxy<Data,Subsets>& proxy, const Data& gdf, Symbol name) {
inline Gatherer* gatherer(GroupedCallProxy<Data,Subsets>& proxy, const Data& gdf, const SymbolString& name) {
typename Data::group_iterator git = gdf.group_begin();
typename Data::slicing_index indices = *git;
RObject first(proxy.get(indices));
Expand All @@ -309,7 +309,7 @@ namespace dplyr {

check_length(Rf_length(first), indices.size(), "the group size");

check_supported_type(first, name.c_str());
check_supported_type(first, name);

const int ng = gdf.ngroups();
int i = 0;
Expand Down
19 changes: 9 additions & 10 deletions inst/include/dplyr/GroupedDataFrame.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

#include <dplyr/Result/GroupedSubset.h>

#include <tools/SymbolVector.h>
#include <tools/SymbolMap.h>

namespace dplyr {

inline void check_valid_colnames(const DataFrame& df) {
Expand Down Expand Up @@ -56,7 +59,7 @@ namespace dplyr {
data_(x),
group_sizes(),
biggest_group_size(0),
symbols(data_.attr("vars")),
symbols(get_vars(data_)),
labels()
{
// handle lazyness
Expand All @@ -82,8 +85,8 @@ namespace dplyr {
return GroupedDataFrameIndexIterator(*this);
}

SEXP symbol(int i) const {
return symbols[i];
SymbolString symbol(int i) const {
return symbols.get_name(i);
}

DataFrame& data() {
Expand Down Expand Up @@ -113,12 +116,8 @@ namespace dplyr {
return biggest_group_size;
}

inline bool has_group(Symbol g) const {
int n = symbols.size();
for (int i=0; i<n; i++) {
if (symbols[i] == g) return true;
}
return false;
inline bool has_group(const SymbolString& g) const {
return symbols.has(g);
}

inline subset* create_subset(SEXP x) const {
Expand All @@ -130,7 +129,7 @@ namespace dplyr {
DataFrame data_;
IntegerVector group_sizes;
int biggest_group_size;
ListOf<Symbol> symbols;
SymbolMap symbols;
DataFrame labels;

};
Expand Down
8 changes: 4 additions & 4 deletions inst/include/dplyr/NamedListAccumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ namespace dplyr {

NamedListAccumulator() {}

inline void set(Symbol name, SEXP x) {
inline void set(const SymbolString& name, SEXP x) {
if (! Rcpp::traits::same_type<Data, RowwiseDataFrame>::value)
check_supported_type(x, name.c_str());
check_supported_type(x, name);

SymbolMapIndex index = symbol_map.insert(name);
if (index.origin == NEW) {
Expand All @@ -28,7 +28,7 @@ namespace dplyr {

}

inline void rm(SEXP name) {
inline void rm(const SymbolString& name) {
SymbolMapIndex index = symbol_map.rm(name);
if (index.origin != NEW) {
data.erase(data.begin() + index.pos);
Expand All @@ -45,7 +45,7 @@ namespace dplyr {
return data.size();
}

inline CharacterVector names() const {
inline const SymbolVector names() const {
return symbol_map.get_names();
}

Expand Down
10 changes: 5 additions & 5 deletions inst/include/dplyr/Result/GroupedCallProxy.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ namespace dplyr {
hybrid_eval.reset();
}

void input(Symbol name, SEXP x) {
void input(const SymbolString& name, SEXP x) {
subsets.input(name, x);
hybrid_eval.reset();
}
Expand All @@ -80,12 +80,12 @@ namespace dplyr {
return subsets.size();
}

inline bool has_variable(SEXP symbol) const {
return subsets.count(symbol);
inline bool has_variable(const SymbolString& name) const {
return subsets.count(name);
}

inline SEXP get_variable(Rcpp::String name) const {
return subsets.get_variable(Rf_installChar(name.get_sexp()));
inline SEXP get_variable(const SymbolString& name) const {
return subsets.get_variable(name);
}

inline bool is_constant() const {
Expand Down
12 changes: 6 additions & 6 deletions inst/include/dplyr/Result/GroupedHybridCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace dplyr {
virtual ~IHybridCallback() {}

public:
virtual SEXP get_subset(const Symbol& name) const = 0;
virtual SEXP get_subset(const SymbolString& name) const = 0;
};

class GroupedHybridEnv {
Expand Down Expand Up @@ -45,7 +45,7 @@ namespace dplyr {
// Environment::new_child() performs an R callback, creating the environment
// in R should be slightly faster
Environment active_env =
create_env_symbol(
create_env_string(
names, &GroupedHybridEnv::hybrid_get_callback,
PAYLOAD(const_cast<void*>(reinterpret_cast<const void*>(callback))), env);

Expand All @@ -58,10 +58,10 @@ namespace dplyr {
has_eval_env = true;
}

static SEXP hybrid_get_callback(const Symbol& name, bindrcpp::PAYLOAD payload) {
static SEXP hybrid_get_callback(const String& name, bindrcpp::PAYLOAD payload) {
LOG_VERBOSE;
IHybridCallback* callback_ = reinterpret_cast<IHybridCallback*>(payload.p);
return callback_->get_subset(name);
return callback_->get_subset(SymbolString(name));
}

void cleanup_eval_env() const {
Expand Down Expand Up @@ -180,7 +180,7 @@ namespace dplyr {
public:
GroupedHybridEval(const Call& call_, const ILazySubsets& subsets_, const Environment& env_) :
indices(NULL), subsets(subsets_), env(env_),
hybrid_env(subsets_.get_variable_names(), env_, this),
hybrid_env(subsets_.get_variable_names().get_vector(), env_, this),
hybrid_call(call_, subsets_, env_)
{
LOG_VERBOSE;
Expand All @@ -191,7 +191,7 @@ namespace dplyr {
}

public: // IHybridCallback
SEXP get_subset(const Symbol& name) const {
SEXP get_subset(const SymbolString& name) const {
LOG_VERBOSE;
return subsets.get(name, get_indices());
}
Expand Down
14 changes: 8 additions & 6 deletions inst/include/dplyr/Result/ILazySubsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#define dplyr_ILazySubsets_H

#include <tools/SlicingIndex.h>
#include <tools/SymbolString.h>
#include <tools/SymbolVector.h>

namespace dplyr {

Expand All @@ -12,12 +14,12 @@ namespace dplyr {
public:
virtual ~ILazySubsets() {}

virtual CharacterVector get_variable_names() const = 0;
virtual SEXP get_variable(SEXP symbol) const = 0;
virtual SEXP get(SEXP symbol, const SlicingIndex& indices) const = 0;
virtual bool is_summary(SEXP symbol) const = 0;
virtual int count(SEXP symbol) const = 0;
virtual void input(SEXP symbol, SEXP x) = 0;
virtual const SymbolVector get_variable_names() const = 0;
virtual SEXP get_variable(const SymbolString& symbol) const = 0;
virtual SEXP get(const SymbolString& symbol, const SlicingIndex& indices) const = 0;
virtual bool is_summary(const SymbolString& symbol) const = 0;
virtual int count(const SymbolString& symbol) const = 0;
virtual void input(const SymbolString& symbol, SEXP x) = 0;
virtual int size() const = 0;
virtual int nrows() const = 0;
};
Expand Down
Loading