Skip to content

Commit 1384547

Browse files
committed
[js] Basic implementation of new encoding ops
1 parent b5f9716 commit 1384547

File tree

3 files changed

+61
-15
lines changed

3 files changed

+61
-15
lines changed

src/vm/js/Operations.nqp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,13 @@ class QAST::OperationsJS {
541541

542542
add_simple_op('encode', $T_OBJ, [$T_STR, $T_STR, $T_OBJ], :side_effects);
543543
add_simple_op('encoderep', $T_OBJ, [$T_STR, $T_STR, $T_STR, $T_OBJ], :side_effects);
544+
545+
add_simple_op('encodeconf', $T_OBJ, [$T_STR, $T_STR, $T_OBJ, $T_INT], :side_effects);
546+
add_simple_op('encoderepconf', $T_OBJ, [$T_STR, $T_STR, $T_STR, $T_OBJ, $T_INT], :side_effects);
547+
544548
add_simple_op('decode', $T_STR, [$T_OBJ, $T_STR]);
549+
add_simple_op('decodeconf', $T_STR, [$T_OBJ, $T_STR, $T_INT]);
550+
add_simple_op('decoderepconf', $T_STR, [$T_OBJ, $T_STR, $T_STR, $T_INT]);
545551

546552
add_simple_op('gethostname', $T_STR, [$T_STR]);
547553

src/vm/js/nqp-runtime/codecs.js

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,35 @@ class SingleByteCodec {
2222
this.encodeBuf = new Buffer(65536);
2323
this.encodeBuf.fill(0);
2424

25+
this.encodeBufPermissive = new Buffer(65536);
26+
this.encodeBufPermissive.fill(0);
27+
2528
// stored separately so that we can have a unmapped flag in encodeBuf
2629
this.zero = codes.charCodeAt(0);
2730
for (let i = 1; i < codes.length; i++) {
28-
this.encodeBuf[codes.charCodeAt(i)] = i;
31+
if (codes[i] === '�') {
32+
this.encodeBufPermissive[i] = i;
33+
} else {
34+
this.encodeBufPermissive[codes.charCodeAt(i)] = i;
35+
this.encodeBuf[codes.charCodeAt(i)] = i;
36+
}
2937
}
3038

3139
this.decodeBuf = Buffer.from(codes, 'ucs2');
40+
this.decodeBufPermissive = Buffer.from(codes.replace(//g, function(match, offset, string) {
41+
return String.fromCharCode(offset);
42+
}), 'ucs2');
3243
}
3344

34-
encode(str) {
45+
encode(str, permissive) {
3546
const buf = new Buffer(str.length);
47+
const encodeBuf = permissive ? this.encodeBufPermissive : this.encodeBuf;
3648
for (let i = 0; i < str.length; i++) {
3749
const unit = str.charCodeAt(i);
3850
if (unit === this.zero) {
3951
buf[i] = 0;
4052
} else {
41-
const encoded = this.encodeBuf[unit];
53+
const encoded = encodeBuf[unit];
4254
if (encoded === 0) {
4355
throw new NQPException('Error encoding ' + this.name + ' string: could not encode codepoint ' + unit);
4456
} else {
@@ -50,7 +62,9 @@ class SingleByteCodec {
5062
return buf;
5163
}
5264

53-
encodeWithReplacement(str, replacement) {
65+
encodeWithReplacement(str, replacement, permissive) {
66+
const encodeBuf = permissive ? this.encodeBufPermissive : this.encodeBuf;
67+
5468
const replacementBuffer = this.encode(replacement);
5569

5670
let replacementCount = 0;
@@ -60,7 +74,7 @@ class SingleByteCodec {
6074
i++;
6175
replacementCount++;
6276
} else {
63-
if (code !== this.zero && this.encodeBuf[code] === 0) {
77+
if (code !== this.zero && encodeBuf[code] === 0) {
6478
replacementCount++;
6579
}
6680
}
@@ -75,7 +89,7 @@ class SingleByteCodec {
7589
if (unit === this.zero) {
7690
buf[i] = 0;
7791
} else {
78-
const encoded = this.encodeBuf[unit];
92+
const encoded = encodeBuf[unit];
7993
if (encoded === 0) {
8094
offset += replacementBuffer.copy(buf, offset);
8195
} else {
@@ -91,8 +105,8 @@ class SingleByteCodec {
91105
return buf;
92106
}
93107

94-
decode(buf) {
95-
const decodeBuf = this.decodeBuf;
108+
decode(buf, permissive) {
109+
const decodeBuf = permissive ? this.decodeBufPermissive : this.decodeBuf;
96110
const newBuf = new Buffer(buf.length*2);
97111
let idx1 = 0;
98112
let idx2 = 0;
@@ -103,6 +117,10 @@ class SingleByteCodec {
103117
}
104118
return newBuf.toString('ucs2');
105119
}
120+
121+
decodeWithReplacement(buf, replacement, permissive) {
122+
return this.decode(buf, permissive).replace(//g, replacement);
123+
}
106124
};
107125

108126
function withASCII(codes) {
@@ -114,7 +132,8 @@ function withASCII(codes) {
114132
return codes;
115133
}
116134

117-
const windows1252 = new SingleByteCodec('Windows-1252', withASCII('€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'));
135+
136+
const windows1252 = new SingleByteCodec('Windows-1252', withASCII('€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'));
118137

119138
const latin1 = new SingleByteCodec('Latin-1', withASCII('€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'));
120139

src/vm/js/nqp-runtime/core.js

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,7 @@ function writeBuffer(highLevel, lowLevel) {
971971
}
972972
}
973973

974-
op.encode = function(str, encoding_, output) {
974+
op.encodeconf = function(str, encoding_, output, permissive) {
975975
if (output.array.length) {
976976
throw new NQPException('encode requires an empty array');
977977
}
@@ -981,7 +981,7 @@ op.encode = function(str, encoding_, output) {
981981
let buffer;
982982

983983
if (encoding in codecs) {
984-
buffer = codecs[encoding].encode(str);
984+
buffer = codecs[encoding].encode(str, permissive);
985985
} else {
986986
buffer = new Buffer(str, encoding);
987987
}
@@ -992,13 +992,17 @@ op.encode = function(str, encoding_, output) {
992992
return output;
993993
};
994994

995-
op.encoderep = function(str, encoding_, replacement, output) {
995+
op.encode = function(str, encoding, output) {
996+
return op.encodeconf(str, encoding, output, 1);
997+
};
998+
999+
op.encoderepconf = function(str, encoding_, replacement, output, permissive) {
9961000
const encoding = renameEncoding(encoding_);
9971001

9981002
let buffer;
9991003

10001004
if (encoding in codecs) {
1001-
buffer = codecs[encoding].encodeWithReplacement(str, replacement);
1005+
buffer = codecs[encoding].encodeWithReplacement(str, replacement, permissive);
10021006
} else {
10031007
throw new NQPException('encoding unsupported in encoderep');
10041008
}
@@ -1008,6 +1012,10 @@ op.encoderep = function(str, encoding_, replacement, output) {
10081012
return output;
10091013
};
10101014

1015+
op.encoderep = function(str, encoding, replacement, output) {
1016+
return op.encoderepconf(str, encoding, replacement, output, 1);
1017+
};
1018+
10111019
function toRawBuffer(buf) {
10121020
const elementSize = byteSize(buf);
10131021
const isUnsigned = buf._STable.REPR.type._STable.REPR.isUnsigned;
@@ -1040,10 +1048,10 @@ function bufferDifference(a, b) {
10401048
return a.length;
10411049
}
10421050

1043-
op.decode = function(buf, encoding) {
1051+
op.decodeconf = function(buf, encoding, permissive) {
10441052
let rawBuffer = toRawBuffer(buf);
10451053
if (encoding === 'windows-1252' || encoding === 'utf8-c8') {
1046-
return codecs[encoding].decode(rawBuffer);
1054+
return codecs[encoding].decode(rawBuffer, permissive);
10471055
} else if (encoding === 'utf8') {
10481056
const decoded = rawBuffer.toString(renameEncoding(encoding));
10491057
const reencoded = Buffer.from(decoded, renameEncoding(encoding));
@@ -1081,6 +1089,19 @@ op.decode = function(buf, encoding) {
10811089
}
10821090
};
10831091

1092+
op.decode = function(buf, encoding) {
1093+
return op.decodeconf(buf, encoding, 1);
1094+
};
1095+
1096+
op.decoderepconf = function(buf, encoding, replacement, permissive) {
1097+
if (encoding === 'windows-1252') {
1098+
const rawBuffer = toRawBuffer(buf);
1099+
return codecs[encoding].decodeWithReplacement(rawBuffer, replacement, permissive);
1100+
} else {
1101+
return op.decodeconf(buf, encoding, permissive);
1102+
}
1103+
};
1104+
10841105
op.objprimspec = function(obj) {
10851106
if (obj === Null) return 0;
10861107
if (typeof obj === 'object') {

0 commit comments

Comments
 (0)