@@ -41,6 +41,8 @@ const shortid = require('shortid');
41
41
42
42
const stripMarks = require ( './strip-marks.js' ) ;
43
43
44
+ const codecs = require ( './codecs.js' ) ;
45
+
44
46
const foldCase = require ( 'fold-case' ) ;
45
47
46
48
const xregexp = require ( 'xregexp' ) ;
@@ -801,7 +803,7 @@ op.isint = function(value) {
801
803
802
804
/* HACK - utf8-c is a different encoding than utf8 */
803
805
function renameEncoding ( encoding ) {
804
- return { 'utf8-c8' : 'utf8' , 'utf16' : 'utf16le' , 'iso-8859-1' : 'binary ' } [ encoding ] || encoding ;
806
+ return { 'utf8-c8' : 'utf8' , 'utf16' : 'utf16le' , 'iso-8859-1' : 'latin1 ' } [ encoding ] || encoding ;
805
807
}
806
808
exports . renameEncoding = renameEncoding ;
807
809
@@ -824,26 +826,52 @@ function byteSize(buf) {
824
826
825
827
exports . byteSize = byteSize ;
826
828
827
- op . encode = function ( str , encoding_ , buf ) {
828
- if ( buf . array . length ) {
829
+ function writeBuffer ( highLevel , lowLevel ) {
830
+ const elementSize = byteSize ( highLevel ) ;
831
+ const isUnsigned = highLevel . _STable . REPR . type . _STable . REPR . isUnsigned ;
832
+
833
+ let offset = 0 ;
834
+ for ( let i = 0 ; i < lowLevel . length / elementSize ; i ++ ) {
835
+ highLevel . array [ i ] = isUnsigned ? lowLevel . readUIntLE ( offset , elementSize ) : lowLevel . readIntLE ( offset , elementSize ) ;
836
+ offset += elementSize ;
837
+ }
838
+ }
839
+
840
+ op . encode = function ( str , encoding_ , output ) {
841
+ if ( output . array . length ) {
829
842
throw new NQPException ( 'encode requires an empty array' ) ;
830
843
}
831
844
832
845
const encoding = renameEncoding ( encoding_ ) ;
833
846
834
- const elementSize = byteSize ( buf ) ;
847
+ let buffer ;
835
848
836
- const isUnsigned = buf . _STable . REPR . type . _STable . REPR . isUnsigned ;
849
+ if ( encoding in codecs ) {
850
+ buffer = codecs [ encoding ] . encode ( str ) ;
851
+ } else {
852
+ buffer = new Buffer ( str , encoding ) ;
853
+ }
837
854
838
- const buffer = new Buffer ( str , encoding ) ;
855
+ writeBuffer ( output , buffer ) ;
839
856
840
- let offset = 0 ;
841
- for ( let i = 0 ; i < buffer . length / elementSize ; i ++ ) {
842
- buf . array [ i ] = isUnsigned ? buffer . readUIntLE ( offset , elementSize ) : buffer . readIntLE ( offset , elementSize ) ;
843
- offset += elementSize ;
857
+
858
+ return output ;
859
+ } ;
860
+
861
+ op . encoderep = function ( str , encoding_ , replacement , output ) {
862
+ const encoding = renameEncoding ( encoding_ ) ;
863
+
864
+ let buffer ;
865
+
866
+ if ( encoding in codecs ) {
867
+ buffer = codecs [ encoding ] . encodeWithReplacement ( str , replacement ) ;
868
+ } else {
869
+ throw new NQPException ( 'encoding unsupported in encoderep' ) ;
844
870
}
845
871
846
- return buf ;
872
+ writeBuffer ( output , buffer ) ;
873
+
874
+ return output ;
847
875
} ;
848
876
849
877
function toRawBuffer ( buf ) {
@@ -868,8 +896,55 @@ function toRawBuffer(buf) {
868
896
869
897
exports . toRawBuffer = toRawBuffer ;
870
898
899
+ function bufferDifference ( a , b ) {
900
+ for ( let i = 0 ; i < a . length ; i ++ ) {
901
+ if ( a [ i ] != b [ i ] ) {
902
+ return i ;
903
+ }
904
+ }
905
+
906
+ return a . length ;
907
+ }
908
+
871
909
op . decode = function ( buf , encoding ) {
872
- return toRawBuffer ( buf ) . toString ( renameEncoding ( encoding ) ) ;
910
+ let rawBuffer = toRawBuffer ( buf ) ;
911
+ if ( encoding === 'windows-1252' ) {
912
+ return codecs [ encoding ] . decode ( rawBuffer ) ;
913
+ } else if ( encoding === 'utf8' ) {
914
+ const decoded = rawBuffer . toString ( renameEncoding ( encoding ) )
915
+ const reencoded = Buffer . from ( decoded , renameEncoding ( encoding ) ) ;
916
+ if ( rawBuffer . equals ( reencoded ) ) {
917
+ return decoded ;
918
+ } else {
919
+ const correctPart = rawBuffer . slice ( 0 , bufferDifference ( reencoded , rawBuffer ) ) ;
920
+ const lines = correctPart . toString ( 'utf8' ) . split ( / \r \n | [ \n \r \u0085 \u2029 \f \u000b \u2028 ] / ) ;
921
+ throw new NQPException ( "Malformed UTF-8 at line "
922
+ + ( lines . length ) + " col " + ( lines [ lines . length - 1 ] . length + 1 )
923
+ + "(or malformed termination)"
924
+ ) ;
925
+ }
926
+ } else if ( encoding === 'utf16' ) {
927
+ if ( rawBuffer [ 0 ] === 0xff && rawBuffer [ 1 ] === 0xfe ) { //LE BOM
928
+ rawBuffer = rawBuffer . slice ( 2 ) ;
929
+ } else if ( rawBuffer [ 0 ] === 0xfe && rawBuffer [ 1 ] === 0xff ) { //BE BOM
930
+ throw new NQPException ( 'Big-endian UTF16 is NYI' ) ;
931
+ }
932
+
933
+ const decoded = rawBuffer . toString ( 'utf16le' )
934
+ const reencoded = Buffer . from ( decoded , 'utf16le' ) ;
935
+ if ( rawBuffer . equals ( reencoded ) ) {
936
+ return decoded ;
937
+ } else {
938
+ const correctPart = rawBuffer . slice ( 0 , bufferDifference ( reencoded , rawBuffer ) ) ;
939
+ const lines = correctPart . toString ( 'utf16le' ) . split ( / \r \n | [ \n \r \u0085 \u2029 \f \u000b \u2028 ] / ) ;
940
+ throw new NQPException ( "Malformed UTF-16 at line "
941
+ + ( lines . length ) + " col " + ( lines [ lines . length - 1 ] . length + 1 )
942
+ + "(or malformed termination)"
943
+ ) ;
944
+ }
945
+ } else {
946
+ return rawBuffer . toString ( renameEncoding ( encoding ) ) ;
947
+ }
873
948
} ;
874
949
875
950
op . objprimspec = function ( obj ) {
0 commit comments