Skip to content

Commit

Permalink
buffer: add buffer.transcode
Browse files Browse the repository at this point in the history
Add buffer.transcode(source, from, to) method. Primarily uses ICU
to transcode a buffer's content from one of Node.js' supported
encodings to another.

Originally part of a proposal to add a new unicode module. Decided
to refactor the approach towrds individual PRs without a new module.

Refs: #8075
PR-URL: #9038
Reviewed-By: Anna Henningsen <anna@addaleax.net>
  • Loading branch information
jasnell authored and evanlucas committed Nov 3, 2016
1 parent a5c62cb commit 0939edd
Show file tree
Hide file tree
Showing 9 changed files with 437 additions and 39 deletions.
27 changes: 27 additions & 0 deletions doc/api/buffer.md
Original file line number Diff line number Diff line change
Expand Up @@ -2302,6 +2302,33 @@ added: v3.0.0
On 32-bit architectures, this value is `(2^30)-1` (~1GB).
On 64-bit architectures, this value is `(2^31)-1` (~2GB).

## buffer.transcode(source, fromEnc, toEnc)
<!-- YAML
added: REPLACEME
-->

* `source` {Buffer} A `Buffer` instance
* `fromEnc` {String} The current encoding
* `toEnc` {String} To target encoding

Re-encodes the given `Buffer` instance from one character encoding to another.
Returns a new `Buffer` instance.

Throws if the `fromEnc` or `toEnc` specify invalid character encodings or if
conversion from `fromEnc` to `toEnc` is not permitted.

The transcoding process will use substitution characters if a given byte
sequence cannot be adequately represented in the target encoding. For instance:

```js
const newBuf = buffer.transcode(Buffer.from(''), 'utf8', 'ascii');
console.log(newBuf.toString('ascii'));
// prints '?'
```

Because the Euro (``) sign is not representable in US-ASCII, it is replaced
with `?` in the transcoded `Buffer`.

## Class: SlowBuffer
<!-- YAML
deprecated: v6.0.0
Expand Down
4 changes: 4 additions & 0 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -1360,3 +1360,7 @@ Buffer.prototype.swap64 = function swap64() {
};

Buffer.prototype.toLocaleString = Buffer.prototype.toString;

// Put this at the end because internal/buffer has a circular
// dependency on Buffer.
exports.transcode = require('internal/buffer').transcode;
30 changes: 30 additions & 0 deletions lib/internal/buffer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
'use strict';

if (!process.binding('config').hasIntl) {
return;
}

const normalizeEncoding = require('internal/util').normalizeEncoding;
const Buffer = require('buffer').Buffer;

const icu = process.binding('icu');

// Transcodes the Buffer from one encoding to another, returning a new
// Buffer instance.
exports.transcode = function transcode(source, fromEncoding, toEncoding) {
if (!Buffer.isBuffer(source))
throw new TypeError('"source" argument must be a Buffer');
if (source.length === 0) return Buffer.alloc(0);

fromEncoding = normalizeEncoding(fromEncoding) || fromEncoding;
toEncoding = normalizeEncoding(toEncoding) || toEncoding;
const result = icu.transcode(source, fromEncoding, toEncoding);
if (Buffer.isBuffer(result))
return result;

const code = icu.icuErrName(result);
const err = new Error(`Unable to transcode Buffer [${code}]`);
err.code = code;
err.errno = result;
throw err;
};
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
'lib/v8.js',
'lib/vm.js',
'lib/zlib.js',
'lib/internal/buffer.js',
'lib/internal/child_process.js',
'lib/internal/cluster.js',
'lib/internal/freelist.js',
Expand Down
55 changes: 19 additions & 36 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,6 @@
if (!(r)) return env->ThrowRangeError("out of range index"); \
} while (0)

#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
do { \
if (!HasInstance(obj)) \
return env->ThrowTypeError("argument should be a Buffer"); \
} while (0)

#define SPREAD_ARG(val, name) \
CHECK((val)->IsUint8Array()); \
Local<Uint8Array> name = (val).As<Uint8Array>(); \
ArrayBuffer::Contents name##_c = name->Buffer()->GetContents(); \
const size_t name##_offset = name->ByteOffset(); \
const size_t name##_length = name->ByteLength(); \
char* const name##_data = \
static_cast<char*>(name##_c.Data()) + name##_offset; \
if (name##_length > 0) \
CHECK_NE(name##_data, nullptr);

#define SLICE_START_END(start_arg, end_arg, end_max) \
size_t start; \
size_t end; \
Expand Down Expand Up @@ -448,7 +431,7 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = env->isolate();

THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
SPREAD_ARG(args.This(), ts_obj);
SPREAD_BUFFER_ARG(args.This(), ts_obj);

if (ts_obj_length == 0)
return args.GetReturnValue().SetEmptyString();
Expand All @@ -465,7 +448,7 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);

THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
SPREAD_ARG(args.This(), ts_obj);
SPREAD_BUFFER_ARG(args.This(), ts_obj);

if (ts_obj_length == 0)
return args.GetReturnValue().SetEmptyString();
Expand Down Expand Up @@ -543,8 +526,8 @@ void Copy(const FunctionCallbackInfo<Value> &args) {
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
Local<Object> target_obj = args[0].As<Object>();
SPREAD_ARG(args.This(), ts_obj);
SPREAD_ARG(target_obj, target);
SPREAD_BUFFER_ARG(args.This(), ts_obj);
SPREAD_BUFFER_ARG(target_obj, target);

size_t target_start;
size_t source_start;
Expand Down Expand Up @@ -577,7 +560,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);

THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[0], ts_obj);

size_t start = args[2]->Uint32Value();
size_t end = args[3]->Uint32Value();
Expand All @@ -590,7 +573,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) {

// First check if Buffer has been passed.
if (Buffer::HasInstance(args[1])) {
SPREAD_ARG(args[1], fill_obj);
SPREAD_BUFFER_ARG(args[1], fill_obj);
str_length = fill_obj_length;
memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
goto start_fill;
Expand Down Expand Up @@ -669,7 +652,7 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);

THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
SPREAD_ARG(args.This(), ts_obj);
SPREAD_BUFFER_ARG(args.This(), ts_obj);

if (!args[0]->IsString())
return env->ThrowTypeError("Argument must be a string");
Expand Down Expand Up @@ -747,7 +730,7 @@ static inline void Swizzle(char* start, unsigned int len) {
template <typename T, enum Endianness endianness>
void ReadFloatGeneric(const FunctionCallbackInfo<Value>& args) {
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[0], ts_obj);

uint32_t offset = args[1]->Uint32Value();
CHECK_LE(offset + sizeof(T), ts_obj_length);
Expand Down Expand Up @@ -881,8 +864,8 @@ void CompareOffset(const FunctionCallbackInfo<Value> &args) {

THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_ARG(args[1], target);
SPREAD_BUFFER_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[1], target);

size_t target_start;
size_t source_start;
Expand Down Expand Up @@ -921,8 +904,8 @@ void Compare(const FunctionCallbackInfo<Value> &args) {

THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
SPREAD_ARG(args[0], obj_a);
SPREAD_ARG(args[1], obj_b);
SPREAD_BUFFER_ARG(args[0], obj_a);
SPREAD_BUFFER_ARG(args[1], obj_b);

size_t cmp_length = MIN(obj_a_length, obj_b_length);

Expand Down Expand Up @@ -977,7 +960,7 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
UTF8);

THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[0], ts_obj);

Local<String> needle = args[1].As<String>();
int64_t offset_i64 = args[2]->IntegerValue();
Expand Down Expand Up @@ -1084,8 +1067,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {

THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_ARG(args[1], buf);
SPREAD_BUFFER_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[1], buf);
int64_t offset_i64 = args[2]->IntegerValue();
bool is_forward = args[4]->IsTrue();

Expand Down Expand Up @@ -1143,7 +1126,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
ASSERT(args[3]->IsBoolean());

THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[0], ts_obj);

uint32_t needle = args[1]->Uint32Value();
int64_t offset_i64 = args[2]->IntegerValue();
Expand Down Expand Up @@ -1171,7 +1154,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
void Swap16(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[0], ts_obj);
SwapBytes16(ts_obj_data, ts_obj_length);
args.GetReturnValue().Set(args[0]);
}
Expand All @@ -1180,7 +1163,7 @@ void Swap16(const FunctionCallbackInfo<Value>& args) {
void Swap32(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[0], ts_obj);
SwapBytes32(ts_obj_data, ts_obj_length);
args.GetReturnValue().Set(args[0]);
}
Expand All @@ -1189,7 +1172,7 @@ void Swap32(const FunctionCallbackInfo<Value>& args) {
void Swap64(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
SPREAD_ARG(args[0], ts_obj);
SPREAD_BUFFER_ARG(args[0], ts_obj);
SwapBytes64(ts_obj_data, ts_obj_length);
args.GetReturnValue().Set(args[0]);
}
Expand Down
Loading

0 comments on commit 0939edd

Please sign in to comment.