@@ -262,6 +262,73 @@ strio_initialize(int argc, VALUE *argv, VALUE self)
262
262
return strio_init (argc , argv , ptr , self );
263
263
}
264
264
265
+ static int
266
+ detect_bom (VALUE str , int * bomlen )
267
+ {
268
+ const char * p ;
269
+ long len ;
270
+
271
+ RSTRING_GETMEM (str , p , len );
272
+ if (len < 1 ) return 0 ;
273
+ switch ((unsigned char )p [0 ]) {
274
+ case 0xEF :
275
+ if (len < 2 ) break ;
276
+ if ((unsigned char )p [1 ] == 0xBB && len > 2 ) {
277
+ if ((unsigned char )p [2 ] == 0xBF ) {
278
+ * bomlen = 3 ;
279
+ return rb_utf8_encindex ();
280
+ }
281
+ }
282
+ break ;
283
+
284
+ case 0xFE :
285
+ if (len < 2 ) break ;
286
+ if ((unsigned char )p [1 ] == 0xFF ) {
287
+ * bomlen = 2 ;
288
+ return rb_enc_find_index ("UTF-16BE" );
289
+ }
290
+ break ;
291
+
292
+ case 0xFF :
293
+ if (len < 2 ) break ;
294
+ if ((unsigned char )p [1 ] == 0xFE ) {
295
+ if (len >= 4 && (unsigned char )p [2 ] == 0 && (unsigned char )p [3 ] == 0 ) {
296
+ * bomlen = 4 ;
297
+ return rb_enc_find_index ("UTF-32LE" );
298
+ }
299
+ * bomlen = 2 ;
300
+ return rb_enc_find_index ("UTF-16LE" );
301
+ }
302
+ break ;
303
+
304
+ case 0 :
305
+ if (len < 4 ) break ;
306
+ if ((unsigned char )p [1 ] == 0 && (unsigned char )p [2 ] == 0xFE & (unsigned char )p [3 ] == 0xFF ) {
307
+ * bomlen = 4 ;
308
+ return rb_enc_find_index ("UTF-32BE" );
309
+ }
310
+ break ;
311
+ }
312
+ return 0 ;
313
+ }
314
+
315
+ static rb_encoding *
316
+ set_encoding_by_bom (struct StringIO * ptr )
317
+ {
318
+ int bomlen , idx = detect_bom (ptr -> string , & bomlen );
319
+ rb_encoding * extenc = NULL ;
320
+
321
+ if (idx ) {
322
+ extenc = rb_enc_from_index (idx );
323
+ ptr -> pos = bomlen ;
324
+ if (ptr -> flags & FMODE_WRITABLE ) {
325
+ rb_enc_associate_index (ptr -> string , idx );
326
+ }
327
+ }
328
+ ptr -> enc = extenc ;
329
+ return extenc ;
330
+ }
331
+
265
332
static VALUE
266
333
strio_init (int argc , VALUE * argv , struct StringIO * ptr , VALUE self )
267
334
{
@@ -294,6 +361,7 @@ strio_init(int argc, VALUE *argv, struct StringIO *ptr, VALUE self)
294
361
ptr -> enc = convconfig .enc ;
295
362
ptr -> pos = 0 ;
296
363
ptr -> lineno = 0 ;
364
+ if (ptr -> flags & FMODE_SETENC_BY_BOM ) set_encoding_by_bom (ptr );
297
365
RBASIC (self )-> flags |= (ptr -> flags & FMODE_READWRITE ) * (STRIO_READABLE / FMODE_READABLE );
298
366
return self ;
299
367
}
@@ -1677,6 +1745,18 @@ strio_set_encoding(int argc, VALUE *argv, VALUE self)
1677
1745
return self ;
1678
1746
}
1679
1747
1748
+ static VALUE
1749
+ strio_set_encoding_by_bom (VALUE self )
1750
+ {
1751
+ struct StringIO * ptr = StringIO (self );
1752
+
1753
+ if (ptr -> enc ) {
1754
+ rb_raise (rb_eArgError , "encoding conversion is set" );
1755
+ }
1756
+ if (!set_encoding_by_bom (ptr )) return Qnil ;
1757
+ return rb_enc_from_encoding (ptr -> enc );
1758
+ }
1759
+
1680
1760
/*
1681
1761
* Pseudo I/O on String object.
1682
1762
*
@@ -1778,6 +1858,7 @@ Init_stringio(void)
1778
1858
rb_define_method (StringIO , "external_encoding" , strio_external_encoding , 0 );
1779
1859
rb_define_method (StringIO , "internal_encoding" , strio_internal_encoding , 0 );
1780
1860
rb_define_method (StringIO , "set_encoding" , strio_set_encoding , -1 );
1861
+ rb_define_method (StringIO , "set_encoding_by_bom" , strio_set_encoding_by_bom , 0 );
1781
1862
1782
1863
{
1783
1864
VALUE mReadable = rb_define_module_under (rb_cIO , "generic_readable" );
0 commit comments