Skip to content

Commit

Permalink
[locale] Avoid copy of __atoms when char_type is char
Browse files Browse the repository at this point in the history
The function num_get<_CharT>::stage2_int_prep makes unnecessary copy of src
into atoms when char_type is char. This can be avoided by creating
a switch on type and just returning __src when char_type is char.

Added the test case to demonstrate performance improvement.
In order to avoid ABI incompatibilities, the changes are guarded
with a macro _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET

Differential Revision: https://reviews.llvm.org/D30268
Reviewed by: EricWF

llvm-svn: 305427
  • Loading branch information
hiraditya committed Jun 14, 2017
1 parent ad94b45 commit 38bc3df
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 6 deletions.
38 changes: 38 additions & 0 deletions libcxx/benchmarks/stringstream.bench.cpp
@@ -0,0 +1,38 @@
#include "benchmark/benchmark_api.h"

#include <sstream>
double __attribute__((noinline)) istream_numbers();

double istream_numbers() {
const char *a[] = {
"-6 69 -71 2.4882e-02 -100 101 -2.00005 5000000 -50000000",
"-25 71 7 -9.3262e+01 -100 101 -2.00005 5000000 -50000000",
"-14 53 46 -6.7026e-02 -100 101 -2.00005 5000000 -50000000"
};

int a1, a2, a3, a4, a5, a6, a7;
double f1 = 0.0, f2 = 0.0, q = 0.0;
for (int i=0; i < 3; i++) {
std::istringstream s(a[i]);
s >> a1
>> a2
>> a3
>> f1
>> a4
>> a5
>> f2
>> a6
>> a7;
q += (a1 + a2 + a3 + a4 + a5 + a6 + a7 + f1 + f2)/1000000;
}
return q;
}

static void BM_Istream_numbers(benchmark::State &state) {
double i = 0;
while (state.KeepRunning())
benchmark::DoNotOptimize(i += istream_numbers());
}

BENCHMARK(BM_Istream_numbers)->RangeMultiplier(2)->Range(1024, 4096);
BENCHMARK_MAIN()
3 changes: 3 additions & 0 deletions libcxx/include/__config
Expand Up @@ -76,6 +76,9 @@
// its vtable and typeinfo to libc++ rather than having all other libraries
// using that class define their own copies.
#define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION

// Enable optimized version of __do_get_(un)signed which avoids redundant copies.
#define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
#elif _LIBCPP_ABI_VERSION == 1
#if !defined(_LIBCPP_OBJECT_FORMAT_COFF)
// Enable compiling copies of now inline methods into the dylib to support
Expand Down
72 changes: 66 additions & 6 deletions libcxx/include/locale
Expand Up @@ -372,19 +372,57 @@ template <class _CharT>
struct __num_get
: protected __num_get_base
{
static string __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep);
static string __stage2_float_prep(ios_base& __iob, _CharT* __atoms, _CharT& __decimal_point,
_CharT& __thousands_sep);
static int __stage2_int_loop(_CharT __ct, int __base, char* __a, char*& __a_end,
unsigned& __dc, _CharT __thousands_sep, const string& __grouping,
unsigned* __g, unsigned*& __g_end, _CharT* __atoms);

static int __stage2_float_loop(_CharT __ct, bool& __in_units, char& __exp,
char* __a, char*& __a_end,
_CharT __decimal_point, _CharT __thousands_sep,
const string& __grouping, unsigned* __g,
unsigned*& __g_end, unsigned& __dc, _CharT* __atoms);
#ifndef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
static string __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep);
static int __stage2_int_loop(_CharT __ct, int __base, char* __a, char*& __a_end,
unsigned& __dc, _CharT __thousands_sep, const string& __grouping,
unsigned* __g, unsigned*& __g_end, _CharT* __atoms);

#else
static string __stage2_int_prep(ios_base& __iob, _CharT& __thousands_sep)
{
locale __loc = __iob.getloc();
const numpunct<_CharT>& __np = use_facet<numpunct<_CharT> >(__loc);
__thousands_sep = __np.thousands_sep();
return __np.grouping();
}

const _CharT* __do_widen(ios_base& __iob, _CharT* __atoms) const
{
return __do_widen_p(__iob, __atoms);
}


static int __stage2_int_loop(_CharT __ct, int __base, char* __a, char*& __a_end,
unsigned& __dc, _CharT __thousands_sep, const string& __grouping,
unsigned* __g, unsigned*& __g_end, const _CharT* __atoms);
private:
template<typename T>
const T* __do_widen_p(ios_base& __iob, T* __atoms) const
{
locale __loc = __iob.getloc();
use_facet<ctype<T> >(__loc).widen(__src, __src + 26, __atoms);
return __atoms;
}

const char* __do_widen_p(ios_base& __iob, char* __atoms) const
{
(void)__iob;
(void)__atoms;
return __src;
}
#endif
};

#ifndef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
template <class _CharT>
string
__num_get<_CharT>::__stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep)
Expand All @@ -395,6 +433,7 @@ __num_get<_CharT>::__stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& _
__thousands_sep = __np.thousands_sep();
return __np.grouping();
}
#endif

template <class _CharT>
string
Expand All @@ -411,9 +450,16 @@ __num_get<_CharT>::__stage2_float_prep(ios_base& __iob, _CharT* __atoms, _CharT&

template <class _CharT>
int
#ifndef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
__num_get<_CharT>::__stage2_int_loop(_CharT __ct, int __base, char* __a, char*& __a_end,
unsigned& __dc, _CharT __thousands_sep, const string& __grouping,
unsigned* __g, unsigned*& __g_end, _CharT* __atoms)
#else
__num_get<_CharT>::__stage2_int_loop(_CharT __ct, int __base, char* __a, char*& __a_end,
unsigned& __dc, _CharT __thousands_sep, const string& __grouping,
unsigned* __g, unsigned*& __g_end, const _CharT* __atoms)

#endif
{
if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25]))
{
Expand Down Expand Up @@ -849,9 +895,16 @@ num_get<_CharT, _InputIterator>::__do_get_signed(iter_type __b, iter_type __e,
// Stage 1
int __base = this->__get_base(__iob);
// Stage 2
char_type __atoms[26];
char_type __thousands_sep;
const int __atoms_size = 26;
#ifdef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
char_type __atoms1[__atoms_size];
const char_type *__atoms = this->__do_widen(__iob, __atoms1);
string __grouping = this->__stage2_int_prep(__iob, __thousands_sep);
#else
char_type __atoms[__atoms_size];
string __grouping = this->__stage2_int_prep(__iob, __atoms, __thousands_sep);
#endif
string __buf;
__buf.resize(__buf.capacity());
char* __a = &__buf[0];
Expand Down Expand Up @@ -899,9 +952,16 @@ num_get<_CharT, _InputIterator>::__do_get_unsigned(iter_type __b, iter_type __e,
// Stage 1
int __base = this->__get_base(__iob);
// Stage 2
char_type __atoms[26];
char_type __thousands_sep;
const int __atoms_size = 26;
#ifdef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
char_type __atoms1[__atoms_size];
const char_type *__atoms = this->__do_widen(__iob, __atoms1);
string __grouping = this->__stage2_int_prep(__iob, __thousands_sep);
#else
char_type __atoms[__atoms_size];
string __grouping = this->__stage2_int_prep(__iob, __atoms, __thousands_sep);
#endif
string __buf;
__buf.resize(__buf.capacity());
char* __a = &__buf[0];
Expand Down

0 comments on commit 38bc3df

Please sign in to comment.