Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
1797 lines (1673 sloc) 83.1 KB
using System;
using System.IO;
using System.Diagnostics;
using System.Collections;
using System.Runtime.CompilerServices;
using IronRuby.Builtins;
using IronRuby.Runtime;
using IronRuby.Runtime.Calls;
using Microsoft.Scripting;
using Microsoft.Scripting.Utils;
using Microsoft.Scripting.Runtime;
namespace IronRuby.Hpricot {
using RubyIOReadCallSite = CallSite<Func<CallSite, Object, Object, Object>>;
public class HpricotScanner {
#region fields
private static readonly String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
private RubyContext/*!*/ _context;
private BlockParam/*!*/ _blockParam;
private ConversionStorage<MutableString> _toMutableString;
private BinaryOpStorage _readIOStorage;
private ScannerState _state;
private RubySymbol sym_xmldecl;
private RubySymbol sym_doctype;
private RubySymbol sym_procins;
private RubySymbol sym_stag;
private RubySymbol sym_etag;
private RubySymbol sym_emptytag;
private RubySymbol sym_comment;
private RubySymbol sym_cdata;
private RubySymbol sym_text;
private RubySymbol sym_EMPTY;
private RubySymbol sym_CDATA;
private RubySymbol symAllow;
private RubySymbol symDeny;
private RubySymbol _optXml;
private RubySymbol _optFixupTags;
private RubySymbol _optXhtmlStrict;
#endregion
#region fields used by the ragel-generated code
private const int DEFAULT_BUFFER_SIZE = 32768;
int cs, act, have = 0, nread = 0, curline = 1, p = -1;
bool text = false;
int ts = -1, te;
int eof = -1;
char[] buf;
Object attr;
Object[] tag, akey, aval;
int mark_tag, mark_akey, mark_aval;
bool done = false, ele_open = false;
int buffer_size = DEFAULT_BUFFER_SIZE;
bool taint = false;
#endregion
#region fields generated by ragel
static readonly sbyte[] _hpricot_scan_actions = new sbyte[] {
0, 1, 1, 1, 2, 1, 4, 1,
5, 1, 6, 1, 7, 1, 8, 1,
9, 1, 10, 1, 11, 1, 12, 1,
14, 1, 16, 1, 20, 1, 21, 1,
22, 1, 24, 1, 25, 1, 26, 1,
28, 1, 29, 1, 30, 1, 32, 1,
33, 1, 38, 1, 39, 1, 40, 1,
41, 1, 42, 1, 43, 1, 44, 1,
45, 1, 46, 1, 47, 1, 48, 1,
49, 1, 50, 1, 51, 2, 2, 5,
2, 2, 6, 2, 2, 11, 2, 2,
12, 2, 2, 14, 2, 4, 39, 2,
4, 40, 2, 4, 41, 2, 5, 2,
2, 6, 14, 2, 7, 6, 2, 7,
14, 2, 11, 12, 2, 13, 3, 2,
14, 6, 2, 14, 40, 2, 15, 24,
2, 15, 28, 2, 15, 32, 2, 15,
45, 2, 17, 23, 2, 18, 27, 2,
19, 31, 2, 22, 34, 2, 22, 36,
3, 2, 6, 14, 3, 2, 14, 6,
3, 6, 7, 14, 3, 6, 14, 40,
3, 7, 14, 40, 3, 11, 2, 12,
3, 14, 6, 40, 3, 14, 13, 3,
3, 22, 0, 37, 3, 22, 2, 34,
3, 22, 14, 35, 4, 2, 14, 13,
3, 4, 6, 7, 14, 40, 4, 22,
2, 14, 35, 4, 22, 6, 14, 35,
4, 22, 7, 14, 35, 4, 22, 14,
6, 35, 5, 22, 2, 6, 14, 35,
5, 22, 2, 14, 6, 35, 5, 22,
6, 7, 14, 35
};
static readonly short[] _hpricot_scan_key_offsets = new short[] {
0, 3, 4, 5, 6, 7, 8, 9,
10, 13, 22, 37, 44, 45, 46, 47,
48, 49, 52, 57, 69, 81, 86, 93,
94, 95, 100, 101, 105, 106, 107, 121,
135, 152, 169, 186, 203, 210, 212, 214,
220, 222, 227, 232, 238, 240, 245, 251,
265, 266, 267, 268, 269, 270, 271, 272,
273, 274, 275, 276, 282, 296, 300, 313,
326, 340, 354, 355, 366, 375, 388, 405,
423, 441, 450, 461, 480, 499, 510, 521,
536, 538, 540, 556, 572, 575, 587, 599,
619, 639, 658, 677, 697, 717, 728, 739,
751, 763, 775, 791, 794, 809, 811, 813,
829, 845, 848, 860, 871, 890, 910, 930,
941, 952, 964, 984, 1004, 1016, 1036, 1057,
1074, 1091, 1095, 1098, 1110, 1122, 1142, 1162,
1182, 1194, 1206, 1226, 1242, 1258, 1270, 1291,
1310, 1313, 1328, 1340, 1355, 1358, 1369, 1371,
1373, 1384, 1391, 1404, 1418, 1432, 1445, 1446,
1447, 1448, 1449, 1450, 1451, 1455, 1460, 1469,
1479, 1484, 1491, 1492, 1493, 1494, 1495, 1496,
1497, 1498, 1499, 1503, 1508, 1512, 1522, 1527,
1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540,
1541, 1542, 1546, 1551, 1553, 1554, 1555, 1560,
1561, 1562, 1564, 1565, 1566, 1567, 1568, 1572,
1582, 1591, 1601, 1602, 1603, 1605, 1614, 1615,
1616, 1617, 1619, 1621, 1624, 1627, 1631, 1633,
1634, 1636, 1637, 1640
};
static readonly char[] _hpricot_scan_trans_keys = new char[] {
'\u002d', '\u0044', '\u005b', '\u002d', '\u004f', '\u0043', '\u0054', '\u0059',
'\u0050', '\u0045', '\u0020', '\u0009', '\u000d', '\u0020', '\u003a', '\u005f',
'\u0009', '\u000d', '\u0041', '\u005a', '\u0061', '\u007a', '\u0020', '\u003e',
'\u003f', '\u005b', '\u005f', '\u0009', '\u000d', '\u002d', '\u002e', '\u0030',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0020', '\u003e', '\u0050',
'\u0053', '\u005b', '\u0009', '\u000d', '\u0055', '\u0042', '\u004c', '\u0049',
'\u0043', '\u0020', '\u0009', '\u000d', '\u0020', '\u0022', '\u0027', '\u0009',
'\u000d', '\u0009', '\u0022', '\u003d', '\u005f', '\u0020', '\u0025', '\u0027',
'\u003b', '\u003f', '\u005a', '\u0061', '\u007a', '\u0009', '\u0022', '\u003d',
'\u005f', '\u0020', '\u0025', '\u0027', '\u003b', '\u003f', '\u005a', '\u0061',
'\u007a', '\u0020', '\u003e', '\u005b', '\u0009', '\u000d', '\u0020', '\u0022',
'\u0027', '\u003e', '\u005b', '\u0009', '\u000d', '\u0022', '\u0022', '\u0020',
'\u003e', '\u005b', '\u0009', '\u000d', '\u005d', '\u0020', '\u003e', '\u0009',
'\u000d', '\u0027', '\u0027', '\u0009', '\u0027', '\u003d', '\u005f', '\u0020',
'\u0021', '\u0023', '\u0025', '\u0028', '\u003b', '\u003f', '\u005a', '\u0061',
'\u007a', '\u0009', '\u0027', '\u003d', '\u005f', '\u0020', '\u0021', '\u0023',
'\u0025', '\u0028', '\u003b', '\u003f', '\u005a', '\u0061', '\u007a', '\u0009',
'\u0020', '\u0021', '\u0027', '\u003e', '\u005b', '\u005f', '\u000a', '\u000d',
'\u0023', '\u0025', '\u0028', '\u003b', '\u003d', '\u005a', '\u0061', '\u007a',
'\u0009', '\u0020', '\u0022', '\u0027', '\u003e', '\u005b', '\u005f', '\u000a',
'\u000d', '\u0021', '\u0025', '\u0028', '\u003b', '\u003d', '\u005a', '\u0061',
'\u007a', '\u0009', '\u0020', '\u0021', '\u0027', '\u003e', '\u005b', '\u005f',
'\u000a', '\u000d', '\u0023', '\u0025', '\u0028', '\u003b', '\u003d', '\u005a',
'\u0061', '\u007a', '\u0009', '\u0020', '\u0022', '\u0027', '\u003e', '\u005b',
'\u005f', '\u000a', '\u000d', '\u0021', '\u0025', '\u0028', '\u003b', '\u003d',
'\u005a', '\u0061', '\u007a', '\u0020', '\u0022', '\u0027', '\u003e', '\u005b',
'\u0009', '\u000d', '\u0022', '\u0027', '\u0022', '\u0027', '\u0020', '\u0027',
'\u003e', '\u005b', '\u0009', '\u000d', '\u0027', '\u005d', '\u0020', '\u003e',
'\u005d', '\u0009', '\u000d', '\u0020', '\u0027', '\u003e', '\u0009', '\u000d',
'\u0020', '\u0022', '\u003e', '\u005b', '\u0009', '\u000d', '\u0022', '\u005d',
'\u0020', '\u0022', '\u003e', '\u0009', '\u000d', '\u0020', '\u0027', '\u003e',
'\u005b', '\u0009', '\u000d', '\u0009', '\u0027', '\u003d', '\u005f', '\u0020',
'\u0021', '\u0023', '\u0025', '\u0028', '\u003b', '\u003f', '\u005a', '\u0061',
'\u007a', '\u0059', '\u0053', '\u0054', '\u0045', '\u004d', '\u0043', '\u0044',
'\u0041', '\u0054', '\u0041', '\u005b', '\u003a', '\u005f', '\u0041', '\u005a',
'\u0061', '\u007a', '\u0020', '\u003e', '\u003f', '\u005f', '\u0009', '\u000d',
'\u002d', '\u002e', '\u0030', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a',
'\u0020', '\u003e', '\u0009', '\u000d', '\u0020', '\u002f', '\u003e', '\u003f',
'\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061',
'\u007a', '\u0020', '\u002f', '\u003e', '\u003f', '\u005f', '\u0009', '\u000d',
'\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0020', '\u002f',
'\u003d', '\u003e', '\u003f', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a',
'\u0041', '\u005a', '\u0061', '\u007a', '\u0020', '\u002f', '\u003d', '\u003e',
'\u003f', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u003e', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f',
'\u003c', '\u003e', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020',
'\u002f', '\u003c', '\u003e', '\u0009', '\u000a', '\u000b', '\u000c', '\u0020',
'\u002f', '\u003e', '\u003f', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a',
'\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u002f', '\u003c',
'\u003e', '\u003f', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u002f',
'\u003c', '\u003d', '\u003e', '\u003f', '\u005f', '\u0009', '\u000a', '\u000b',
'\u000c', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d',
'\u0020', '\u002f', '\u003c', '\u003d', '\u003e', '\u003f', '\u005f', '\u0009',
'\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061',
'\u007a', '\u000d', '\u0020', '\u002f', '\u003c', '\u003e', '\u0009', '\u000a',
'\u000b', '\u000c', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c',
'\u003e', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020', '\u0022',
'\u0027', '\u002f', '\u003c', '\u003e', '\u003f', '\u005f', '\u0009', '\u000a',
'\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a',
'\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u003f',
'\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041',
'\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u0022', '\u002f', '\u003c',
'\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020',
'\u0022', '\u002f', '\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b',
'\u000c', '\u0020', '\u0022', '\u002f', '\u003e', '\u003f', '\u005c', '\u005f',
'\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a',
'\u0022', '\u005c', '\u0022', '\u005c', '\u0020', '\u0022', '\u002f', '\u003d',
'\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a',
'\u0041', '\u005a', '\u0061', '\u007a', '\u0020', '\u0022', '\u002f', '\u003d',
'\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a',
'\u0041', '\u005a', '\u0061', '\u007a', '\u0022', '\u003e', '\u005c', '\u000d',
'\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u005c', '\u0009',
'\u000a', '\u000b', '\u000c', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f',
'\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d',
'\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u003f', '\u005c',
'\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041',
'\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f',
'\u003c', '\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000a', '\u000b',
'\u000c', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d',
'\u0020', '\u0022', '\u002f', '\u003c', '\u003e', '\u003f', '\u005c', '\u005f',
'\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u000d', '\u0020', '\u0022', '\u002f', '\u003c', '\u003e',
'\u003f', '\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u0022',
'\u002f', '\u003c', '\u003d', '\u003e', '\u003f', '\u005c', '\u005f', '\u0009',
'\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061',
'\u007a', '\u000d', '\u0020', '\u0022', '\u002f', '\u003c', '\u003d', '\u003e',
'\u003f', '\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u0022',
'\u002f', '\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c',
'\u000d', '\u0020', '\u0022', '\u002f', '\u003c', '\u003e', '\u005c', '\u0009',
'\u000a', '\u000b', '\u000c', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f',
'\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d',
'\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u005c', '\u0009',
'\u000a', '\u000b', '\u000c', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f',
'\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u0020',
'\u0022', '\u0027', '\u002f', '\u003e', '\u003f', '\u005c', '\u005f', '\u0009',
'\u000d', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0022',
'\u0027', '\u005c', '\u0020', '\u0027', '\u002f', '\u003e', '\u003f', '\u005c',
'\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061',
'\u007a', '\u0027', '\u005c', '\u0027', '\u005c', '\u0020', '\u0027', '\u002f',
'\u003d', '\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000d', '\u002d',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0020', '\u0027', '\u002f',
'\u003d', '\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000d', '\u002d',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0027', '\u003e', '\u005c',
'\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u005c',
'\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020', '\u0027', '\u002f',
'\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d',
'\u0020', '\u0027', '\u002f', '\u003c', '\u003e', '\u003f', '\u005c', '\u005f',
'\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u000d', '\u0020', '\u0027', '\u002f', '\u003c', '\u003d',
'\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c',
'\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020',
'\u0027', '\u002f', '\u003c', '\u003d', '\u003e', '\u003f', '\u005c', '\u005f',
'\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u000d', '\u0020', '\u0027', '\u002f', '\u003c', '\u003e',
'\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020', '\u0027',
'\u002f', '\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c',
'\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u005c',
'\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020', '\u0022', '\u0027',
'\u002f', '\u003c', '\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000a',
'\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a',
'\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u003f',
'\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a',
'\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u0022', '\u0027',
'\u002f', '\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c',
'\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u003f',
'\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a',
'\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u0022', '\u0027',
'\u002f', '\u003c', '\u003d', '\u003e', '\u003f', '\u005c', '\u005f', '\u0009',
'\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061',
'\u007a', '\u0020', '\u0022', '\u0027', '\u002f', '\u003d', '\u003e', '\u003f',
'\u005c', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u0020', '\u0022', '\u0027', '\u002f', '\u003d', '\u003e',
'\u003f', '\u005c', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041',
'\u005a', '\u0061', '\u007a', '\u0022', '\u0027', '\u003e', '\u005c', '\u0022',
'\u0027', '\u005c', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c',
'\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020',
'\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u005c', '\u0009', '\u000a',
'\u000b', '\u000c', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c',
'\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c',
'\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020',
'\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u003f', '\u005c', '\u005f',
'\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c',
'\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c',
'\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u000d', '\u0020',
'\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u005c', '\u0009', '\u000a',
'\u000b', '\u000c', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c',
'\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020',
'\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u003f', '\u005c', '\u005f',
'\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u0020', '\u0022', '\u0027', '\u002f', '\u003e', '\u003f',
'\u005c', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u0020', '\u0022', '\u0027', '\u002f', '\u003e', '\u003f',
'\u005c', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c',
'\u003e', '\u005c', '\u0009', '\u000a', '\u000b', '\u000c', '\u000d', '\u0020',
'\u0022', '\u0027', '\u002f', '\u003c', '\u003d', '\u003e', '\u003f', '\u005c',
'\u005f', '\u0009', '\u000a', '\u000b', '\u000c', '\u002d', '\u003a', '\u0041',
'\u005a', '\u0061', '\u007a', '\u000d', '\u0020', '\u0027', '\u002f', '\u003c',
'\u003e', '\u003f', '\u005c', '\u005f', '\u0009', '\u000a', '\u000b', '\u000c',
'\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0022', '\u0027',
'\u005c', '\u0020', '\u0027', '\u002f', '\u003e', '\u003f', '\u005c', '\u005f',
'\u0009', '\u000d', '\u002d', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a',
'\u000d', '\u0020', '\u0022', '\u0027', '\u002f', '\u003c', '\u003e', '\u005c',
'\u0009', '\u000a', '\u000b', '\u000c', '\u0020', '\u0022', '\u002f', '\u003e',
'\u003f', '\u005c', '\u005f', '\u0009', '\u000d', '\u002d', '\u003a', '\u0041',
'\u005a', '\u0061', '\u007a', '\u0022', '\u0027', '\u005c', '\u000d', '\u0020',
'\u0027', '\u002f', '\u003c', '\u003e', '\u005c', '\u0009', '\u000a', '\u000b',
'\u000c', '\u0022', '\u005c', '\u0027', '\u005c', '\u000d', '\u0020', '\u0022',
'\u0027', '\u002f', '\u003c', '\u003e', '\u0009', '\u000a', '\u000b', '\u000c',
'\u003a', '\u005f', '\u0078', '\u0041', '\u005a', '\u0061', '\u007a', '\u0020',
'\u003f', '\u005f', '\u0009', '\u000d', '\u002d', '\u002e', '\u0030', '\u003a',
'\u0041', '\u005a', '\u0061', '\u007a', '\u0020', '\u003f', '\u005f', '\u006d',
'\u0009', '\u000d', '\u002d', '\u002e', '\u0030', '\u003a', '\u0041', '\u005a',
'\u0061', '\u007a', '\u0020', '\u003f', '\u005f', '\u006c', '\u0009', '\u000d',
'\u002d', '\u002e', '\u0030', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a',
'\u0020', '\u003f', '\u005f', '\u0009', '\u000d', '\u002d', '\u002e', '\u0030',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0065', '\u0072', '\u0073',
'\u0069', '\u006f', '\u006e', '\u0020', '\u003d', '\u0009', '\u000d', '\u0020',
'\u0022', '\u0027', '\u0009', '\u000d', '\u005f', '\u002d', '\u002e', '\u0030',
'\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0022', '\u005f', '\u002d',
'\u002e', '\u0030', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0020',
'\u003e', '\u003f', '\u0009', '\u000d', '\u0020', '\u003e', '\u003f', '\u0065',
'\u0073', '\u0009', '\u000d', '\u003e', '\u006e', '\u0063', '\u006f', '\u0064',
'\u0069', '\u006e', '\u0067', '\u0020', '\u003d', '\u0009', '\u000d', '\u0020',
'\u0022', '\u0027', '\u0009', '\u000d', '\u0041', '\u005a', '\u0061', '\u007a',
'\u0022', '\u005f', '\u002d', '\u002e', '\u0030', '\u0039', '\u0041', '\u005a',
'\u0061', '\u007a', '\u0020', '\u003e', '\u003f', '\u0009', '\u000d', '\u0020',
'\u003e', '\u003f', '\u0073', '\u0009', '\u000d', '\u0074', '\u0061', '\u006e',
'\u0064', '\u0061', '\u006c', '\u006f', '\u006e', '\u0065', '\u0020', '\u003d',
'\u0009', '\u000d', '\u0020', '\u0022', '\u0027', '\u0009', '\u000d', '\u006e',
'\u0079', '\u006f', '\u0022', '\u0020', '\u003e', '\u003f', '\u0009', '\u000d',
'\u0065', '\u0073', '\u006e', '\u0079', '\u006f', '\u0027', '\u0065', '\u0073',
'\u0041', '\u005a', '\u0061', '\u007a', '\u0027', '\u005f', '\u002d', '\u002e',
'\u0030', '\u0039', '\u0041', '\u005a', '\u0061', '\u007a', '\u005f', '\u002d',
'\u002e', '\u0030', '\u003a', '\u0041', '\u005a', '\u0061', '\u007a', '\u0027',
'\u005f', '\u002d', '\u002e', '\u0030', '\u003a', '\u0041', '\u005a', '\u0061',
'\u007a', '\u003e', '\u003e', '\u000a', '\u003c', '\u0021', '\u002f', '\u003a',
'\u003f', '\u005f', '\u0041', '\u005a', '\u0061', '\u007a', '\u0027', '\u005d',
'\u0022', '\u0022', '\u005c', '\u0027', '\u005c', '\u0022', '\u0027', '\u005c',
'\u0020', '\u0009', '\u000d', '\u0020', '\u0076', '\u0009', '\u000d', '\u000a',
'\u002d', '\u002d', '\u000a', '\u005d', '\u005d', '\u000a', '\u003e', '\u003f',
'\u003e', (char) 0
};
static readonly sbyte[] _hpricot_scan_single_lengths = new sbyte[] {
3, 1, 1, 1, 1, 1, 1, 1,
1, 3, 5, 5, 1, 1, 1, 1,
1, 1, 3, 4, 4, 3, 5, 1,
1, 3, 1, 2, 1, 1, 4, 4,
7, 7, 7, 7, 5, 2, 2, 4,
2, 3, 3, 4, 2, 3, 4, 4,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 4, 2, 5, 5,
6, 6, 1, 7, 5, 5, 7, 8,
8, 5, 7, 9, 9, 7, 7, 7,
2, 2, 8, 8, 3, 8, 8, 10,
10, 9, 9, 10, 10, 7, 7, 8,
8, 8, 8, 3, 7, 2, 2, 8,
8, 3, 8, 7, 9, 10, 10, 7,
7, 8, 10, 10, 8, 10, 11, 9,
9, 4, 3, 8, 8, 10, 10, 10,
8, 8, 10, 8, 8, 8, 11, 9,
3, 7, 8, 7, 3, 7, 2, 2,
7, 3, 3, 4, 4, 3, 1, 1,
1, 1, 1, 1, 2, 3, 1, 2,
3, 5, 1, 1, 1, 1, 1, 1,
1, 1, 2, 3, 0, 2, 3, 4,
1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 3, 2, 1, 1, 3, 1,
1, 2, 1, 1, 1, 1, 0, 2,
1, 2, 1, 1, 2, 5, 1, 1,
1, 2, 2, 3, 1, 2, 2, 1,
2, 1, 3, 1
};
static readonly sbyte[] _hpricot_scan_range_lengths = new sbyte[] {
0, 0, 0, 0, 0, 0, 0, 0,
1, 3, 5, 1, 0, 0, 0, 0,
0, 1, 1, 4, 4, 1, 1, 0,
0, 1, 0, 1, 0, 0, 5, 5,
5, 5, 5, 5, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 1, 5,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 5, 1, 4, 4,
4, 4, 0, 2, 2, 4, 5, 5,
5, 2, 2, 5, 5, 2, 2, 4,
0, 0, 4, 4, 0, 2, 2, 5,
5, 5, 5, 5, 5, 2, 2, 2,
2, 2, 4, 0, 4, 0, 0, 4,
4, 0, 2, 2, 5, 5, 5, 2,
2, 2, 5, 5, 2, 5, 5, 4,
4, 0, 0, 2, 2, 5, 5, 5,
2, 2, 5, 4, 4, 2, 5, 5,
0, 4, 2, 4, 0, 2, 0, 0,
2, 2, 5, 5, 5, 5, 0, 0,
0, 0, 0, 0, 1, 1, 4, 4,
1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 2, 4, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 2, 4,
4, 4, 0, 0, 0, 2, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 0
};
static readonly short[] _hpricot_scan_index_offsets = new short[] {
0, 4, 6, 8, 10, 12, 14, 16,
18, 21, 28, 39, 46, 48, 50, 52,
54, 56, 59, 64, 73, 82, 87, 94,
96, 98, 103, 105, 109, 111, 113, 123,
133, 146, 159, 172, 185, 192, 195, 198,
204, 207, 212, 217, 223, 226, 231, 237,
247, 249, 251, 253, 255, 257, 259, 261,
263, 265, 267, 269, 274, 284, 288, 298,
308, 319, 330, 332, 342, 350, 360, 373,
387, 401, 409, 419, 434, 449, 459, 469,
481, 484, 487, 500, 513, 517, 528, 539,
555, 571, 586, 601, 617, 633, 643, 653,
664, 675, 686, 699, 703, 715, 718, 721,
734, 747, 751, 762, 772, 787, 803, 819,
829, 839, 850, 866, 882, 893, 909, 926,
940, 954, 959, 963, 974, 985, 1001, 1017,
1033, 1044, 1055, 1071, 1084, 1097, 1108, 1125,
1140, 1144, 1156, 1167, 1179, 1183, 1193, 1196,
1199, 1209, 1215, 1224, 1234, 1244, 1253, 1255,
1257, 1259, 1261, 1263, 1265, 1269, 1274, 1280,
1287, 1292, 1299, 1301, 1303, 1305, 1307, 1309,
1311, 1313, 1315, 1319, 1324, 1327, 1334, 1339,
1345, 1347, 1349, 1351, 1353, 1355, 1357, 1359,
1361, 1363, 1367, 1372, 1375, 1377, 1379, 1384,
1386, 1388, 1391, 1393, 1395, 1397, 1399, 1402,
1409, 1415, 1422, 1424, 1426, 1429, 1437, 1439,
1441, 1443, 1446, 1449, 1453, 1456, 1460, 1463,
1465, 1468, 1470, 1474
};
static readonly byte[] _hpricot_scan_trans_targs = new byte[] {
1, 2, 53, 204, 204, 204, 3, 204,
4, 204, 5, 204, 6, 204, 7, 204,
8, 204, 9, 9, 204, 9, 10, 10,
9, 10, 10, 204, 11, 204, 10, 26,
10, 11, 10, 10, 10, 10, 204, 11,
204, 12, 48, 26, 11, 204, 13, 204,
14, 204, 15, 204, 16, 204, 17, 204,
18, 18, 204, 18, 19, 30, 18, 204,
20, 21, 20, 20, 20, 20, 20, 20,
204, 20, 21, 20, 20, 20, 20, 20,
20, 204, 22, 204, 26, 22, 204, 22,
23, 28, 204, 26, 22, 204, 25, 24,
25, 24, 25, 204, 26, 25, 204, 27,
26, 27, 204, 27, 204, 25, 29, 25,
29, 31, 32, 31, 31, 31, 31, 31,
31, 31, 204, 31, 32, 31, 31, 31,
31, 31, 31, 31, 204, 33, 33, 31,
32, 204, 26, 31, 22, 31, 31, 31,
31, 204, 33, 33, 23, 34, 204, 26,
31, 22, 31, 31, 31, 31, 204, 35,
35, 47, 32, 206, 40, 47, 36, 47,
47, 47, 47, 29, 35, 35, 37, 34,
206, 40, 47, 36, 47, 47, 47, 47,
29, 36, 37, 46, 206, 40, 36, 29,
39, 43, 38, 39, 43, 38, 39, 25,
206, 40, 39, 29, 41, 42, 40, 41,
207, 27, 41, 26, 42, 25, 206, 42,
29, 43, 25, 208, 44, 43, 24, 41,
45, 44, 45, 25, 208, 45, 24, 39,
25, 206, 40, 39, 29, 47, 32, 47,
47, 47, 47, 47, 47, 47, 29, 49,
204, 50, 204, 51, 204, 52, 204, 21,
204, 54, 204, 55, 204, 56, 204, 57,
204, 58, 204, 204, 204, 60, 60, 60,
60, 204, 61, 204, 60, 60, 61, 60,
60, 60, 60, 204, 61, 204, 61, 204,
63, 66, 204, 62, 62, 63, 62, 62,
62, 204, 63, 66, 204, 64, 64, 63,
64, 64, 64, 204, 65, 66, 67, 204,
64, 64, 65, 64, 64, 64, 204, 65,
66, 67, 204, 64, 64, 65, 64, 64,
64, 204, 204, 204, 144, 144, 142, 143,
73, 204, 204, 144, 74, 68, 69, 69,
73, 204, 204, 69, 70, 68, 69, 66,
204, 64, 64, 69, 64, 64, 64, 204,
69, 69, 73, 204, 204, 71, 71, 69,
70, 71, 71, 71, 68, 65, 65, 73,
204, 74, 204, 71, 71, 65, 72, 71,
71, 71, 68, 65, 65, 73, 204, 74,
204, 71, 71, 65, 72, 71, 71, 71,
68, 69, 69, 73, 204, 204, 69, 70,
68, 75, 75, 77, 141, 73, 204, 204,
75, 76, 68, 75, 75, 142, 143, 73,
204, 204, 71, 71, 75, 76, 71, 71,
71, 68, 75, 75, 77, 141, 73, 204,
204, 71, 71, 75, 76, 71, 71, 71,
68, 79, 79, 70, 93, 80, 209, 94,
79, 90, 78, 79, 79, 70, 93, 80,
209, 94, 79, 90, 78, 79, 69, 84,
209, 82, 81, 82, 79, 82, 82, 82,
80, 69, 81, 80, 79, 81, 80, 83,
69, 84, 85, 209, 82, 81, 82, 83,
82, 82, 82, 80, 83, 69, 84, 85,
209, 82, 81, 82, 83, 82, 82, 82,
80, 69, 209, 81, 80, 86, 86, 139,
140, 93, 80, 209, 94, 86, 95, 78,
87, 87, 139, 140, 93, 80, 209, 94,
87, 88, 78, 87, 87, 139, 140, 93,
80, 209, 91, 94, 91, 87, 88, 91,
91, 91, 78, 87, 87, 89, 96, 93,
80, 209, 91, 94, 91, 87, 88, 91,
91, 91, 78, 79, 79, 70, 93, 80,
209, 91, 94, 91, 79, 90, 91, 91,
91, 78, 79, 79, 70, 93, 80, 209,
91, 94, 91, 79, 90, 91, 91, 91,
78, 83, 83, 70, 93, 80, 95, 209,
91, 94, 91, 83, 92, 91, 91, 91,
78, 83, 83, 70, 93, 80, 95, 209,
91, 94, 91, 83, 92, 91, 91, 91,
78, 79, 79, 70, 93, 80, 209, 94,
79, 90, 78, 79, 79, 90, 93, 80,
209, 94, 79, 90, 78, 87, 87, 89,
96, 93, 80, 209, 94, 87, 88, 78,
98, 98, 108, 90, 128, 99, 211, 129,
98, 117, 97, 98, 98, 108, 90, 128,
99, 211, 129, 98, 117, 97, 98, 100,
79, 121, 211, 120, 122, 120, 98, 120,
120, 120, 99, 100, 79, 122, 99, 100,
69, 105, 210, 103, 102, 103, 100, 103,
103, 103, 101, 69, 102, 101, 100, 102,
101, 104, 69, 105, 106, 210, 103, 102,
103, 104, 103, 103, 103, 101, 104, 69,
105, 106, 210, 103, 102, 103, 104, 103,
103, 103, 101, 69, 210, 102, 101, 138,
138, 136, 137, 111, 101, 210, 112, 138,
113, 107, 100, 100, 70, 111, 101, 210,
112, 100, 108, 107, 100, 100, 70, 111,
101, 210, 109, 112, 109, 100, 108, 109,
109, 109, 107, 104, 104, 70, 111, 101,
113, 210, 109, 112, 109, 104, 110, 109,
109, 109, 107, 104, 104, 70, 111, 101,
113, 210, 109, 112, 109, 104, 110, 109,
109, 109, 107, 100, 100, 70, 111, 101,
210, 112, 100, 108, 107, 100, 100, 108,
111, 101, 210, 112, 100, 108, 107, 114,
114, 116, 135, 111, 101, 210, 112, 114,
115, 107, 114, 114, 136, 137, 111, 101,
210, 109, 112, 109, 114, 115, 109, 109,
109, 107, 114, 114, 116, 135, 111, 101,
210, 109, 112, 109, 114, 115, 109, 109,
109, 107, 98, 98, 108, 90, 128, 99,
211, 129, 98, 117, 97, 98, 98, 108,
90, 128, 99, 211, 118, 129, 118, 98,
117, 118, 118, 118, 97, 119, 119, 108,
90, 128, 99, 133, 211, 118, 129, 118,
119, 134, 118, 118, 118, 97, 119, 100,
79, 121, 123, 211, 120, 122, 120, 119,
120, 120, 120, 99, 119, 100, 79, 121,
123, 211, 120, 122, 120, 119, 120, 120,
120, 99, 100, 79, 211, 122, 99, 98,
98, 122, 99, 124, 124, 131, 132, 128,
99, 211, 129, 124, 133, 97, 125, 125,
131, 132, 128, 99, 211, 129, 125, 126,
97, 125, 125, 131, 132, 128, 99, 211,
118, 129, 118, 125, 126, 118, 118, 118,
97, 125, 125, 127, 130, 128, 99, 211,
118, 129, 118, 125, 126, 118, 118, 118,
97, 98, 98, 108, 90, 128, 99, 211,
118, 129, 118, 98, 117, 118, 118, 118,
97, 98, 98, 108, 90, 128, 99, 211,
129, 98, 117, 97, 98, 98, 117, 117,
128, 99, 211, 129, 98, 117, 97, 98,
98, 108, 90, 128, 99, 211, 118, 129,
118, 98, 117, 118, 118, 118, 97, 98,
100, 79, 121, 211, 120, 122, 120, 98,
120, 120, 120, 99, 98, 100, 79, 121,
211, 120, 122, 120, 98, 120, 120, 120,
99, 125, 125, 127, 130, 128, 99, 211,
129, 125, 126, 97, 119, 119, 108, 90,
128, 99, 133, 211, 118, 129, 118, 119,
134, 118, 118, 118, 97, 100, 100, 70,
111, 101, 210, 109, 112, 109, 100, 108,
109, 109, 109, 107, 100, 79, 122, 99,
100, 69, 105, 210, 103, 102, 103, 100,
103, 103, 103, 101, 114, 114, 136, 137,
111, 101, 210, 112, 114, 115, 107, 79,
69, 84, 209, 82, 81, 82, 79, 82,
82, 82, 80, 100, 79, 122, 99, 100,
100, 70, 111, 101, 210, 112, 100, 108,
107, 69, 81, 80, 69, 102, 101, 75,
75, 142, 143, 73, 204, 204, 75, 76,
68, 146, 146, 147, 146, 146, 204, 212,
146, 146, 212, 146, 146, 146, 146, 204,
212, 146, 146, 148, 212, 146, 146, 146,
146, 204, 212, 146, 146, 149, 212, 146,
146, 146, 146, 204, 213, 146, 146, 213,
146, 146, 146, 146, 204, 151, 204, 152,
204, 153, 204, 154, 204, 155, 204, 156,
204, 156, 157, 156, 204, 157, 158, 200,
157, 204, 159, 159, 159, 159, 159, 204,
160, 159, 159, 159, 159, 159, 204, 161,
204, 162, 161, 204, 161, 204, 162, 163,
176, 161, 204, 204, 204, 164, 204, 165,
204, 166, 204, 167, 204, 168, 204, 169,
204, 170, 204, 170, 171, 170, 204, 171,
172, 198, 171, 204, 173, 173, 204, 174,
173, 173, 173, 173, 173, 204, 175, 204,
162, 175, 204, 175, 204, 162, 176, 175,
204, 177, 204, 178, 204, 179, 204, 180,
204, 181, 204, 182, 204, 183, 204, 184,
204, 185, 204, 185, 186, 185, 204, 186,
187, 193, 186, 204, 188, 191, 204, 189,
204, 190, 204, 190, 204, 162, 190, 204,
192, 204, 189, 204, 194, 196, 204, 195,
204, 190, 204, 197, 204, 195, 204, 199,
199, 204, 174, 199, 199, 199, 199, 199,
204, 201, 201, 201, 201, 201, 204, 160,
201, 201, 201, 201, 201, 204, 214, 214,
216, 216, 204, 205, 204, 0, 59, 62,
145, 62, 62, 62, 204, 25, 29, 27,
26, 25, 24, 69, 81, 80, 69, 102,
101, 100, 79, 122, 99, 212, 212, 204,
213, 150, 213, 204, 214, 215, 214, 202,
214, 216, 217, 216, 203, 216, 218, 218,
219, 218, 218, 218, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 204, 204,
204, 204, 204, 204, 204, 204, 214, 216,
204, 204, 204, 204, 204, 204, 204, 204,
204, 214, 216, 218, 0
};
static readonly byte[] _hpricot_scan_trans_actions = new byte[] {
0, 0, 0, 73, 59, 73, 0, 73,
0, 73, 0, 73, 0, 73, 0, 73,
0, 73, 0, 0, 73, 0, 1, 1,
0, 1, 1, 73, 5, 92, 0, 5,
0, 5, 0, 0, 0, 0, 73, 0,
51, 0, 0, 0, 0, 73, 0, 73,
0, 73, 0, 73, 0, 73, 0, 73,
0, 0, 73, 0, 0, 0, 0, 73,
3, 83, 3, 3, 3, 3, 3, 3,
73, 0, 19, 0, 0, 0, 0, 0,
0, 73, 0, 51, 0, 0, 73, 0,
0, 0, 51, 0, 0, 73, 86, 3,
21, 0, 0, 51, 0, 0, 75, 0,
0, 0, 51, 0, 75, 86, 3, 21,
0, 3, 83, 3, 3, 3, 3, 3,
3, 3, 73, 0, 19, 0, 0, 0,
0, 0, 0, 0, 73, 0, 0, 0,
19, 51, 0, 0, 0, 0, 0, 0,
0, 73, 0, 0, 0, 19, 51, 0,
0, 0, 0, 0, 0, 0, 73, 3,
3, 3, 172, 188, 3, 3, 3, 3,
3, 3, 3, 3, 0, 0, 0, 113,
146, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 21, 146, 0, 0, 0,
86, 86, 3, 21, 21, 0, 0, 21,
146, 0, 0, 0, 21, 0, 0, 0,
146, 0, 0, 0, 0, 21, 146, 0,
0, 0, 21, 146, 0, 0, 0, 21,
0, 0, 0, 21, 146, 0, 0, 3,
86, 188, 3, 3, 3, 0, 113, 0,
0, 0, 0, 0, 0, 0, 0, 0,
73, 0, 73, 0, 73, 0, 73, 0,
73, 0, 73, 0, 73, 0, 73, 0,
73, 0, 73, 61, 73, 1, 1, 1,
1, 73, 5, 98, 0, 0, 5, 0,
0, 0, 0, 73, 0, 55, 0, 73,
5, 5, 95, 0, 0, 5, 0, 0,
0, 73, 0, 0, 53, 116, 116, 0,
116, 116, 116, 73, 11, 110, 11, 168,
0, 0, 11, 0, 0, 0, 75, 0,
23, 0, 122, 180, 180, 0, 180, 180,
180, 75, 57, 75, 3, 3, 0, 0,
89, 75, 122, 3, 3, 3, 9, 9,
104, 75, 164, 9, 9, 0, 0, 23,
122, 180, 180, 0, 180, 180, 180, 75,
9, 9, 119, 75, 176, 180, 180, 9,
9, 180, 180, 180, 0, 107, 107, 160,
75, 11, 201, 0, 0, 107, 107, 0,
0, 0, 0, 9, 9, 119, 75, 0,
176, 180, 180, 9, 9, 180, 180, 180,
0, 9, 9, 104, 75, 164, 9, 9,
0, 80, 80, 0, 0, 152, 75, 164,
80, 80, 3, 3, 3, 0, 0, 89,
75, 122, 196, 196, 3, 3, 196, 196,
196, 3, 80, 80, 0, 0, 156, 75,
176, 196, 196, 80, 80, 196, 196, 196,
3, 80, 80, 77, 152, 3, 226, 3,
80, 80, 3, 9, 9, 7, 104, 0,
211, 0, 9, 9, 0, 0, 7, 23,
192, 180, 0, 180, 0, 180, 180, 180,
0, 7, 0, 0, 7, 0, 0, 11,
7, 110, 11, 216, 0, 0, 0, 11,
0, 0, 0, 0, 0, 7, 23, 0,
192, 180, 0, 180, 0, 180, 180, 180,
0, 7, 149, 0, 0, 3, 3, 7,
0, 89, 0, 192, 3, 3, 3, 3,
3, 3, 7, 0, 89, 0, 192, 3,
3, 3, 3, 3, 3, 7, 0, 89,
0, 192, 196, 3, 196, 3, 3, 196,
196, 196, 3, 80, 80, 7, 0, 156,
0, 221, 196, 3, 196, 80, 80, 196,
196, 196, 3, 80, 80, 77, 156, 3,
232, 196, 3, 196, 80, 80, 196, 196,
196, 3, 9, 9, 7, 119, 0, 221,
180, 0, 180, 9, 9, 180, 180, 180,
0, 107, 107, 7, 160, 0, 11, 238,
0, 0, 0, 107, 107, 0, 0, 0,
0, 9, 9, 7, 119, 0, 0, 221,
180, 0, 180, 9, 9, 180, 180, 180,
0, 9, 9, 7, 104, 0, 211, 0,
9, 9, 0, 9, 9, 7, 104, 0,
211, 0, 9, 9, 0, 80, 80, 7,
0, 152, 0, 211, 3, 80, 80, 3,
80, 80, 101, 77, 152, 3, 226, 3,
80, 80, 3, 9, 9, 7, 7, 104,
0, 211, 0, 9, 9, 0, 0, 7,
7, 23, 192, 180, 0, 180, 0, 180,
180, 180, 0, 7, 7, 0, 0, 0,
7, 23, 192, 180, 0, 180, 0, 180,
180, 180, 0, 7, 0, 0, 7, 0,
0, 11, 7, 110, 11, 216, 0, 0,
0, 11, 0, 0, 0, 0, 0, 7,
23, 0, 192, 180, 0, 180, 0, 180,
180, 180, 0, 7, 149, 0, 0, 3,
3, 0, 7, 89, 0, 192, 3, 3,
3, 3, 9, 9, 7, 104, 0, 211,
0, 9, 9, 0, 9, 9, 7, 119,
0, 221, 180, 0, 180, 9, 9, 180,
180, 180, 0, 107, 107, 7, 160, 0,
11, 238, 0, 0, 0, 107, 107, 0,
0, 0, 0, 9, 9, 7, 119, 0,
0, 221, 180, 0, 180, 9, 9, 180,
180, 180, 0, 9, 9, 7, 104, 0,
211, 0, 9, 9, 0, 9, 9, 7,
104, 0, 211, 0, 9, 9, 0, 80,
80, 0, 7, 152, 0, 211, 3, 80,
80, 3, 3, 3, 0, 7, 89, 0,
192, 196, 3, 196, 3, 3, 196, 196,
196, 3, 80, 80, 0, 7, 156, 0,
221, 196, 3, 196, 80, 80, 196, 196,
196, 3, 80, 80, 77, 77, 152, 3,
226, 3, 80, 80, 3, 9, 9, 7,
7, 119, 0, 221, 180, 0, 180, 9,
9, 180, 180, 180, 0, 107, 107, 7,
7, 160, 0, 11, 238, 0, 0, 0,
107, 107, 0, 0, 0, 0, 0, 7,
7, 23, 0, 192, 180, 0, 180, 0,
180, 180, 180, 0, 11, 7, 7, 110,
11, 216, 0, 0, 0, 11, 0, 0,
0, 0, 7, 7, 149, 0, 0, 7,
7, 0, 0, 3, 3, 7, 7, 89,
0, 192, 3, 3, 3, 3, 3, 3,
7, 7, 89, 0, 192, 3, 3, 3,
3, 3, 3, 7, 7, 89, 0, 192,
196, 3, 196, 3, 3, 196, 196, 196,
3, 80, 80, 7, 7, 156, 0, 221,
196, 3, 196, 80, 80, 196, 196, 196,
3, 80, 80, 77, 77, 156, 3, 232,
196, 3, 196, 80, 80, 196, 196, 196,
3, 9, 9, 7, 7, 104, 0, 211,
0, 9, 9, 0, 9, 9, 7, 7,
104, 0, 211, 0, 9, 9, 0, 80,
80, 101, 77, 156, 3, 232, 196, 3,
196, 80, 80, 196, 196, 196, 3, 3,
77, 77, 89, 206, 196, 3, 196, 3,
196, 196, 196, 3, 3, 101, 77, 89,
206, 196, 3, 196, 3, 196, 196, 196,
3, 80, 80, 7, 7, 152, 0, 211,
3, 80, 80, 3, 9, 9, 7, 7,
119, 0, 0, 221, 180, 0, 180, 9,
9, 180, 180, 180, 0, 80, 80, 77,
156, 3, 232, 196, 3, 196, 80, 80,
196, 196, 196, 3, 77, 77, 3, 3,
3, 77, 89, 206, 196, 3, 196, 3,
196, 196, 196, 3, 3, 3, 0, 7,
89, 0, 192, 3, 3, 3, 3, 3,
77, 89, 206, 196, 3, 196, 3, 196,
196, 196, 3, 101, 77, 3, 3, 80,
80, 77, 152, 3, 226, 3, 80, 80,
3, 77, 3, 3, 77, 3, 3, 3,
3, 0, 0, 89, 75, 122, 3, 3,
3, 25, 25, 25, 25, 25, 73, 0,
0, 0, 0, 0, 0, 0, 0, 73,
0, 0, 0, 0, 0, 0, 0, 0,
0, 73, 0, 0, 0, 0, 0, 0,
0, 0, 0, 73, 31, 0, 0, 31,
0, 0, 0, 0, 73, 0, 71, 0,
71, 0, 71, 0, 71, 0, 71, 0,
71, 0, 0, 0, 71, 0, 0, 0,
0, 71, 3, 3, 3, 3, 3, 71,
13, 0, 0, 0, 0, 0, 71, 0,
49, 0, 0, 71, 0, 49, 0, 0,
0, 0, 71, 49, 71, 0, 71, 0,
71, 0, 71, 0, 71, 0, 71, 0,
71, 0, 71, 0, 0, 0, 71, 0,
0, 0, 0, 71, 3, 3, 71, 15,
0, 0, 0, 0, 0, 71, 0, 49,
0, 0, 71, 0, 49, 0, 0, 0,
71, 0, 71, 0, 71, 0, 71, 0,
71, 0, 71, 0, 71, 0, 71, 0,
71, 0, 71, 0, 0, 0, 71, 0,
0, 0, 0, 71, 3, 3, 71, 0,
71, 17, 71, 0, 49, 0, 0, 71,
0, 71, 0, 71, 3, 3, 71, 0,
71, 17, 71, 0, 71, 0, 71, 3,
3, 71, 15, 0, 0, 0, 0, 0,
71, 3, 3, 3, 3, 3, 71, 13,
0, 0, 0, 0, 0, 71, 137, 37,
140, 43, 134, 184, 63, 0, 0, 1,
0, 1, 1, 1, 69, 21, 0, 0,
0, 21, 0, 7, 0, 0, 7, 0,
0, 7, 7, 0, 0, 0, 0, 67,
31, 0, 31, 67, 125, 31, 33, 0,
35, 128, 31, 39, 0, 41, 131, 143,
0, 45, 143, 47, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 75, 75, 75, 75,
73, 75, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 73, 73, 73,
73, 73, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 37, 43,
69, 65, 65, 65, 75, 75, 75, 67,
67, 35, 41, 47, 0
};
static readonly byte[] _hpricot_scan_to_state_actions = new byte[] {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 27, 0, 0, 0,
0, 0, 0, 0, 0, 0, 27, 0,
27, 0, 27, 0
};
static readonly byte[] _hpricot_scan_from_state_actions = new byte[] {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 29, 0, 0, 0,
0, 0, 0, 0, 0, 0, 29, 0,
29, 0, 29, 0
};
static readonly short[] _hpricot_scan_eof_trans = new short[] {
1626, 1626, 1626, 1626, 1626, 1626, 1626, 1626,
1626, 1626, 1626, 1626, 1626, 1626, 1626, 1626,
1626, 1626, 1626, 1626, 1626, 1626, 1626, 1626,
1687, 1687, 1687, 1687, 1626, 1687, 1626, 1626,
1626, 1626, 1626, 1626, 1626, 1626, 1626, 1626,
1626, 1626, 1626, 1626, 1626, 1626, 1626, 1626,
1626, 1626, 1626, 1626, 1626, 1626, 1626, 1626,
1626, 1626, 1626, 1626, 1626, 1626, 1626, 1626,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1687, 1687, 1687, 1687, 1687, 1687, 1687,
1687, 1626, 1626, 1626, 1626, 1626, 1678, 1678,
1678, 1678, 1678, 1678, 1678, 1678, 1678, 1678,
1678, 1678, 1678, 1678, 1678, 1678, 1678, 1678,
1678, 1678, 1678, 1678, 1678, 1678, 1678, 1678,
1678, 1678, 1678, 1678, 1678, 1678, 1678, 1678,
1678, 1678, 1678, 1678, 1678, 1678, 1678, 1678,
1678, 1678, 1678, 1678, 1678, 1678, 1678, 1678,
1678, 1678, 1679, 1680, 0, 1681, 1684, 1684,
1684, 1687, 1687, 1687, 1689, 1689, 0, 1690,
0, 1691, 0, 1692
};
const int hpricot_scan_start = 204;
const int hpricot_scan_error = -1;
const int hpricot_scan_en_html_comment = 214;
const int hpricot_scan_en_html_cdata = 216;
const int hpricot_scan_en_html_procins = 218;
const int hpricot_scan_en_main = 204;
#endregion
public HpricotScanner(ConversionStorage<MutableString>/*!*/ toMutableString, BinaryOpStorage/*!*/ readIOStorage, BlockParam block) {
_context = toMutableString.Context;
_toMutableString = toMutableString;
_readIOStorage = readIOStorage;
_blockParam = block;
InitializeHpricotSymbols(_context);
}
#region InitializeHpricotSymbols
private void InitializeHpricotSymbols(RubyContext context) {
sym_xmldecl = context.CreateAsciiSymbol("xmldecl");
sym_doctype = context.CreateAsciiSymbol("doctype");
sym_procins = context.CreateAsciiSymbol("procins");
sym_stag = context.CreateAsciiSymbol("stag");
sym_etag = context.CreateAsciiSymbol("etag");
sym_emptytag = context.CreateAsciiSymbol("emptytag");
sym_comment = context.CreateAsciiSymbol("comment");
sym_cdata = context.CreateAsciiSymbol("cdata");
sym_text = context.CreateAsciiSymbol("text");
sym_EMPTY = context.CreateAsciiSymbol("EMPTY");
sym_CDATA = context.CreateAsciiSymbol("CDATA");
symAllow = context.CreateAsciiSymbol("allow");
symDeny = context.CreateAsciiSymbol("deny");
_optXml = context.CreateAsciiSymbol("xml");
_optFixupTags = context.CreateAsciiSymbol("fixup_tags");
_optXhtmlStrict = context.CreateAsciiSymbol("xhtml_strict");
}
#endregion
private static Object rb_hash_lookup(Hash hash, Object key) {
Object value;
return hash.TryGetValue(key, out value) ? value : null;
}
private static Object rb_hash_aref(Hash hash, Object key) {
Object value;
return hash.TryGetValue(key, out value) ? value : hash.DefaultValue;
}
private static bool OPT(Hash opts, RubySymbol key) {
if (opts != null) {
Object value;
if (opts.TryGetValue(key, out value) && value is bool) {
return (bool)value;
}
else {
return false;
}
}
else {
return false;
}
}
private IHpricotDataContainer H_ELE(IHpricotDataContainer ele, ScannerState state, RubySymbol sym, MutableString tag, Object attr, Object ec, Int32 raw, Int32 rawlen) {
if (ele is Element) {
ElementData he = ele.GetData<ElementData>();
he.Name = 0;
he.Tag = tag;
he.Attr = attr;
he.EC = ec;
if (raw > -1 && (sym_emptytag.Equals(sym) || sym_stag.Equals(sym) || sym_etag.Equals(sym) || sym_doctype.Equals(sym))) {
he.Raw = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen);
}
}
else if (ele is DocumentType || ele is ProcedureInstruction || ele is XmlDeclaration || ele is ETag || ele is BogusETag) {
AttributeData ha = ele.GetData<AttributeData>();
ha.Tag = tag;
if (ele is ETag || ele is BogusETag) {
if (raw > -1) {
ha.Attr = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen);
}
}
else {
ha.Attr = attr;
}
}
else {
ele.GetData<BasicData>().Tag = tag;
}
state.Last = ele;
return ele;
}
private static void rb_hpricot_add(IHpricotDataContainer focus, IHpricotDataContainer ele) {
ElementData he = focus.GetData<ElementData>();
var children = he.Children;
if (children == null) {
children = new RubyArray(1);
he.Children = children;
}
children.Add(ele);
ele.GetData<BasicData>().Parent = focus;
}
private void rb_yield_tokens(Object sym, Object tag, Object attr, Object raw, bool taint) {
if (sym_text.Equals(sym)) {
raw = tag;
}
var ary = new RubyArray(4);
ary.Add(sym);
ary.Add(tag);
ary.Add(attr);
ary.Add(raw);
if (taint) {
_context.SetObjectTaint(ary, true);
_context.SetObjectTaint(tag, true);
_context.SetObjectTaint(attr, true);
_context.SetObjectTaint(raw, true);
}
Object result = null;
_blockParam.Yield(ary, out result);
}
private void rb_hpricot_token(ScannerState state, RubySymbol sym, MutableString tag, Object attr, int raw, int rawlen, bool taint) {
Object ec = null;
if (!state.Xml) {
ElementData last = state.Focus.GetData<ElementData>();
if (sym_emptytag.Equals(sym) || sym_stag.Equals(sym) || sym_etag.Equals(sym)) {
Debug.Assert(state.EC is Hash, "state.EC is not an instance of Hash");
if (state.EC.ContainsKey(tag)) {
ec = rb_hash_lookup(state.EC, tag);
}
else {
tag = MutableStringOps.DownCase(tag as MutableString);
ec = rb_hash_aref(state.EC, tag);
}
}
// TODO: tag.GetHashCode() == last.name.GetHashCode() ??
if (sym_CDATA.Equals(last.EC) &&
(!sym_procins.Equals(sym) && !sym_comment.Equals(sym) && !sym_cdata.Equals(sym) && !sym_text.Equals(sym)) &&
!(sym_etag.Equals(sym) && tag.GetHashCode() == last.Name.GetHashCode())) {
sym = sym_text;
tag = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen);
}
if (ec != null) {
if (sym_emptytag.Equals(sym)) {
if (!sym_EMPTY.Equals(ec)) {
sym = sym_stag;
}
}
else if (sym_stag.Equals(sym)) {
if (sym_EMPTY.Equals(ec)) {
sym = sym_emptytag;
}
}
}
}
if (sym_emptytag.Equals(sym) || sym_stag.Equals(sym)) {
var ele = H_ELE(new Element(state), state, sym, tag, attr, ec, raw, rawlen);
ElementData he = ele.GetData<ElementData>();
he.Name = tag.GetHashCode();
if (!state.Xml) {
IHpricotDataContainer match = null;
IHpricotDataContainer e = state.Focus;
while (e != state.Doc) {
ElementData hee = e.GetData<ElementData>();
if (hee.EC is Hash) {
Object has;
if ((hee.EC as Hash).TryGetValue(he.Name, out has)) {
if (has is bool && (bool) has == true) {
if (match == null) {
match = e;
}
}
else if (symAllow.Equals(has)) {
match = state.Focus;
}
else if (symDeny.Equals(has)) {
match = null;
}
}
}
e = hee.Parent;
}
if (match == null) {
match = state.Focus;
}
state.Focus = match;
}
rb_hpricot_add(state.Focus, ele);
//
// in the case of a start tag that should be empty, just
// skip the step that focuses the element. focusing moves
// us deeper into the document.
//
if (sym_stag.Equals(sym)) {
if (state.Xml || !sym_EMPTY.Equals(ec)) {
state.Focus = ele;
state.Last = null;
}
}
}
else if (sym_etag.Equals(sym)) {
int name;
IHpricotDataContainer match = null;
IHpricotDataContainer e = state.Focus;
if (state.Strict) {
Debug.Assert(state.EC is Hash, "state.EC is not an instance of Hash");
if (!state.EC.ContainsKey(tag)) {
tag = MutableString.CreateAscii("div");
}
}
//
// another optimization will be to improve this very simple
// O(n) tag search, where n is the depth of the focused tag.
//
// (see also: the search above for fixups)
//
name = tag.GetHashCode();
while (e != state.Doc) {
ElementData he = e.GetData<ElementData>();
if (he != null && he.Name == name) {
match = e;
break;
}
e = he.Parent;
}
if (match == null) {
rb_hpricot_add(state.Focus, H_ELE(new BogusETag(state), state, sym, tag, attr, ec, raw, rawlen));
}
else {
var ele = H_ELE(new ETag(state), state, sym, tag, attr, ec, raw, rawlen);
ElementData he = match.GetData<ElementData>();
// TODO: couldn't find this in the original implementation but it still sounds right.
he.ETag = ele;
state.Focus = he.Parent;
state.Last = null;
}
}
else if (sym_cdata.Equals(sym)) {
rb_hpricot_add(state.Focus, H_ELE(new CData(state), state, sym, tag, attr, ec, raw, rawlen));
}
else if (sym_comment.Equals(sym)) {
rb_hpricot_add(state.Focus, H_ELE(new Comment(state), state, sym, tag, attr, ec, raw, rawlen));
}
else if (sym_doctype.Equals(sym)) {
if (state.Strict) {
// TODO: need to check if attr is really an Hash instance
Debug.Assert(attr is Hash, "attr is not an instance of Hash");
(attr as Hash).Add(state.Context.CreateAsciiSymbol("system_id"), MutableString.CreateAscii("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
(attr as Hash).Add(state.Context.CreateAsciiSymbol("public_id"), MutableString.CreateAscii("-//W3C//DTD XHTML 1.0 Strict//EN"));
}
rb_hpricot_add(state.Focus, H_ELE(new DocumentType(state), state, sym, tag, attr, ec, raw, rawlen));
}
else if (sym_procins.Equals(sym)) {
Debug.Assert(tag is MutableString, "tag is not an instance of MutableString");
MatchData match = Utilities.ProcessInstructionParser.Match(RubyEncoding.Binary, tag as MutableString);
Debug.Assert(match.GroupSuccess(0) && match.GroupCount == 3, "ProcInsParse failed to parse procins");
tag = match.GetGroupValue(1);
attr = match.GetGroupValue(2);
rb_hpricot_add(state.Focus, H_ELE(new ProcedureInstruction(state), state, sym, tag, attr, ec, raw, rawlen));
}
else if (sym_text.Equals(sym)) {
// TODO: add raw_string as well?
if (state.Last != null && state.Last is Text) {
BasicData he = state.Last.GetData<BasicData>();
Debug.Assert(tag is MutableString, "tag is not an instance of MutableString");
Debug.Assert(he.Tag is MutableString, "he.Tag is not an instance of MutableString");
(he.Tag as MutableString).Append(tag as MutableString);
}
else {
rb_hpricot_add(state.Focus, H_ELE(new Text(state), state, sym, tag, attr, ec, raw, rawlen));
}
}
else if (sym_xmldecl.Equals(sym)) {
rb_hpricot_add(state.Focus, H_ELE(new XmlDeclaration(state), state, sym, tag, attr, ec, raw, rawlen));
}
}
private void ELE(RubySymbol N) {
if (te > ts || text) {
int raw = -1;
int rawlen = 0;
ele_open = false;
text = false;
if (ts != -1 && N != sym_cdata && N != sym_text && N != sym_procins && N != sym_comment) {
raw = ts;
rawlen = te - ts;
}
if (_blockParam != null) {
MutableString raw_string = null;
if (raw > 0) {
raw_string = Utilities.CreateMutableStringFromBuffer(buf, raw, rawlen);
}
// NOTE: right before v0.7 the fourth argument of rb_yield_tokens was yielding raw_string
// but now it yields null (hardcoded). I still have to understand why, but this
// might be a way to limit the memory usage of Hpricot given that raw_string was
// not really that used in userland.
rb_yield_tokens(N, tag[0], attr, null, taint);
}
else {
rb_hpricot_token(_state, N, (MutableString)tag[0], attr, raw, rawlen, taint);
}
}
}
private void SET(Object[] N, int E) {
if (N == tag) {
if (mark_tag == -1 || E == mark_tag) {
tag[0] = MutableString.CreateEmpty();
}
else if (E > mark_tag) {
tag[0] = Utilities.CreateMutableStringFromBuffer(buf, mark_tag, E - mark_tag);
}
}
else if (N == akey) {
if (mark_akey == -1 || E == mark_akey) {
akey[0] = MutableString.CreateEmpty();
}
else if (E > mark_akey) {
akey[0] = Utilities.CreateMutableStringFromBuffer(buf, mark_akey, E - mark_akey);
}
}
else if (N == aval) {
if (mark_aval == -1 || E == mark_aval) {
aval[0] = MutableString.CreateEmpty();
}
else if (E > mark_aval) {
aval[0] = Utilities.CreateMutableStringFromBuffer(buf, mark_aval, E - mark_aval);
}
}
}
private void CAT(Object[] N, int E) {
if (N[0] == null) {
SET(N, E);
}
else {
int mark = 0;
if (N == tag) {
mark = mark_tag;
}
else if (N == akey) {
mark = mark_akey;
}
else if (N == aval) {
mark = mark_aval;
}
(N[0] as MutableString).Append(new String(buf, mark, E - mark));
}
}
private void SLIDE(Object N) {
int mark = 0;
if (N == tag) {
mark = mark_tag;
}
else if (N == akey) {
mark = mark_akey;
}
else if (N == aval) {
mark = mark_aval;
}
if (mark > ts) {
if (N == tag) {
mark_tag -= ts;
}
else if (N == akey) {
mark_akey -= ts;
}
else if (N == aval) {
mark_aval -= ts;
}
}
}
private void ATTR(Object K, Object V) {
if (K != null) {
if (attr == null) {
attr = new Hash(_context);
}
(attr as Hash)[K] = V;
}
}
private void ATTR(Object[] K, Object V) {
ATTR(K[0], V);
}
private void ATTR(Object K, Object[] V) {
ATTR(K, V[0]);
}
private void ATTR(Object[] K, Object[] V) {
ATTR(K[0], V[0]);
}
private void TEXT_PASS() {
if (!text) {
if (ele_open) {
ele_open = false;
if (ts > -1) {
mark_tag = ts;
}
}
else {
mark_tag = p;
}
attr = null;
tag[0] = null;
text = true;
}
}
private void EBLK(RubySymbol N, int T) {
CAT(tag, p - T + 1);
ELE(N);
}
public IHpricotDataContainer Scan(Object/*!*/ source, Hash/*!*/ options, Hash/*!*/ elementContent) {
tag = new Object[1];
akey = new Object[1];
aval = new Object[1];
taint = _context.IsObjectTainted(source);
bool sourceRespondsToRead = _context.RespondTo(source, "read");
RubyIOReadCallSite readIOCallSite = null;
if (sourceRespondsToRead) {
readIOCallSite = _readIOStorage.GetCallSite("read", 1);
}
else if (_context.RespondTo(source, "to_str")) {
source = Protocols.CastToString(_toMutableString, source);
}
else {
throw RubyExceptions.CreateArgumentError("bad Hpricot argument, String or IO only please.");
}
if (_blockParam == null) {
var state = new ScannerState(_context);
state.Doc = new Document(state);
state.Focus = state.Doc;
state.Xml = OPT(options, _optXml);
state.Strict = OPT(options, _optXhtmlStrict);
state.Fixup = state.Strict ? true : OPT(options, _optFixupTags);
state.EC = elementContent;
_context.SetInstanceVariable(state.Doc, "@options", options);
_state = state;
}
Int32? rubyBufferSize = Utilities.GetBufferSize(_context);
buffer_size = rubyBufferSize.HasValue ? rubyBufferSize.Value : DEFAULT_BUFFER_SIZE;
buf = new char[buffer_size];
{
cs = hpricot_scan_start;
ts = -1;
te = -1;
act = 0;
}
while (!done) {
p = have;
int pe;
int space = buffer_size - have;
if (space == 0) {
buffer_size += DEFAULT_BUFFER_SIZE;
Array.Resize<char>(ref buf, buffer_size);
space = buffer_size - have;
}
char[] chars;
if (sourceRespondsToRead) {
chars = BinaryEncoding.Instance.GetChars((readIOCallSite.Target(readIOCallSite, source, space) as MutableString).ToByteArray());
}
else {
MutableString str = source as MutableString;
int end = Math.Min(str.Length, nread + space);
chars = str.Encoding.Encoding.GetChars(str.GetBinarySlice(nread, end - nread));
}
Array.Copy(chars, 0, buf, p, chars.Length);
int len = chars.Length;
nread += len;
if (len < space) {
len++;
done = true;
}
pe = p + len;
char[] data = buf;
#region code generated by ragel
{
sbyte _klen;
short _trans;
byte _acts;
sbyte _nacts;
short _keys;
if (p == pe)
goto _test_eof;
_resume:
_acts = _hpricot_scan_from_state_actions[cs];
_nacts = _hpricot_scan_actions[_acts++];
while (_nacts-- > 0) {
switch (_hpricot_scan_actions[_acts++]) {
case 21: { ts = p; }
break;
default:
break;
}
}
_keys = _hpricot_scan_key_offsets[cs];
_trans = (short)_hpricot_scan_index_offsets[cs];
_klen = _hpricot_scan_single_lengths[cs];
if (_klen > 0) {
short _lower = _keys;
short _mid;
short _upper = (short)(_keys + _klen - 1);
while (true) {
if (_upper < _lower)
break;
_mid = (short)(_lower + ((_upper - _lower) >> 1));
if (data[p] < _hpricot_scan_trans_keys[_mid])
_upper = (short)(_mid - 1);
else if (data[p] > _hpricot_scan_trans_keys[_mid])
_lower = (short)(_mid + 1);
else {
_trans += (short)(_mid - _keys);
goto _match;
}
}
_keys += (short)_klen;
_trans += (short)_klen;
}
_klen = _hpricot_scan_range_lengths[cs];
if (_klen > 0) {
short _lower = _keys;
short _mid;
short _upper = (short)(_keys + (_klen << 1) - 2);
while (true) {
if (_upper < _lower)
break;
_mid = (short)(_lower + (((_upper - _lower) >> 1) & ~1));
if (data[p] < _hpricot_scan_trans_keys[_mid])
_upper = (short)(_mid - 2);
else if (data[p] > _hpricot_scan_trans_keys[_mid + 1])
_lower = (short)(_mid + 2);
else {
_trans += (short)((_mid - _keys) >> 1);
goto _match;
}
}
_trans += (short)_klen;
}
_match:
_eof_trans:
cs = _hpricot_scan_trans_targs[_trans];
if (_hpricot_scan_trans_actions[_trans] == 0)
goto _again;
_acts = _hpricot_scan_trans_actions[_trans];
_nacts = _hpricot_scan_actions[_acts++];
while (_nacts-- > 0) {
switch (_hpricot_scan_actions[_acts++]) {
case 0: {
if (text) {
CAT(tag, p);
ELE(sym_text);
text = false;
}
attr = null;
tag[0] = null;
mark_tag = -1;
ele_open = true;
}
break;
case 1: { mark_tag = p; }
break;
case 2: { mark_aval = p; }
break;
case 3: { mark_akey = p; }
break;
case 4: { SET(tag, p); }
break;
case 5: { SET(aval, p); }
break;
case 6: {
if (buf[p - 1] == '"' || buf[p - 1] == '\'') { SET(aval, p - 1); }
else { SET(aval, p); }
}
break;
case 7: { SET(akey, p); }
break;
case 8: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("version"), aval); }
break;
case 9: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("encoding"), aval); }
break;
case 10: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("standalone"), aval); }
break;
case 11: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("public_id"), aval); }
break;
case 12: { SET(aval, p); ATTR(_state.Context.CreateAsciiSymbol("system_id"), aval); }
break;
case 13: {
akey[0] = null;
aval[0] = null;
mark_akey = -1;
mark_aval = -1;
}
break;
case 14: {
ATTR(akey, aval);
}
break;
case 15: { curline += 1; }
break;
case 16: { TEXT_PASS(); }
break;
case 17: { EBLK(sym_comment, 3); { cs = 204; if (true) goto _again; } }
break;
case 18: { EBLK(sym_cdata, 3); { cs = 204; if (true) goto _again; } }
break;
case 19: { EBLK(sym_procins, 2); { cs = 204; if (true) goto _again; } }
break;
case 22: { te = p + 1; }
break;
case 23: { te = p + 1; }
break;
case 24: { te = p + 1; { TEXT_PASS(); } }
break;
case 25: { te = p; p--; { TEXT_PASS(); } }
break;
case 26: { { p = ((te)) - 1; } { TEXT_PASS(); } }
break;
case 27: { te = p + 1; }
break;
case 28: { te = p + 1; { TEXT_PASS(); } }
break;
case 29: { te = p; p--; { TEXT_PASS(); } }
break;
case 30: { { p = ((te)) - 1; } { TEXT_PASS(); } }
break;
case 31: { te = p + 1; }
break;
case 32: { te = p + 1; { TEXT_PASS(); } }
break;
case 33: { te = p; p--; { TEXT_PASS(); } }
break;
case 34: { act = 8; }
break;
case 35: { act = 10; }
break;
case 36: { act = 12; }
break;
case 37: { act = 15; }
break;
case 38: { te = p + 1; { ELE(sym_xmldecl); } }
break;
case 39: { te = p + 1; { ELE(sym_doctype); } }
break;
case 40: { te = p + 1; { ELE(sym_stag); } }
break;
case 41: { te = p + 1; { ELE(sym_etag); } }
break;
case 42: { te = p + 1; { ELE(sym_emptytag); } }
break;
case 43: { te = p + 1; { { cs = 214; if (true) goto _again; } } }
break;
case 44: { te = p + 1; { { cs = 216; if (true) goto _again; } } }
break;
case 45: { te = p + 1; { TEXT_PASS(); } }
break;
case 46: { te = p; p--; { ELE(sym_doctype); } }
break;
case 47: { te = p; p--; { { cs = 218; if (true) goto _again; } } }
break;
case 48: { te = p; p--; { TEXT_PASS(); } }
break;
case 49: { { p = ((te)) - 1; } { { cs = 218; if (true) goto _again; } } }
break;
case 50: { { p = ((te)) - 1; } { TEXT_PASS(); } }
break;
case 51: {
switch (act) {
case 8: { { p = ((te)) - 1; } ELE(sym_doctype); }
break;
case 10: { { p = ((te)) - 1; } ELE(sym_stag); }
break;
case 12: { { p = ((te)) - 1; } ELE(sym_emptytag); }
break;
case 15: { { p = ((te)) - 1; } TEXT_PASS(); }
break;
}
}
break;
default:
break;
}
}
_again:
_acts = _hpricot_scan_to_state_actions[cs];
_nacts = _hpricot_scan_actions[_acts++];
while (_nacts-- > 0) {
switch (_hpricot_scan_actions[_acts++]) {
case 20: { ts = -1; }
break;
default:
break;
}
}
if (++p != pe)
goto _resume;
_test_eof: { }
if (p == eof) {
if (_hpricot_scan_eof_trans[cs] > 0) {
_trans = (short)(_hpricot_scan_eof_trans[cs] - 1);
goto _eof_trans;
}
}
}
#endregion
if (cs == hpricot_scan_error) {
String exceptionMessage;
if (tag[0] != null) {
exceptionMessage = String.Format("parse error on element <{0}>, starting on line {1}.\n{2}", tag.ToString(), curline, NO_WAY_SERIOUSLY);
}
else {
exceptionMessage = String.Format("parse error on line {0}.\n{1}", curline, NO_WAY_SERIOUSLY);
}
throw new ParserException(exceptionMessage);
}
if (done && ele_open) {
ele_open = false;
if (ts > -1) {
mark_tag = ts;
ts = -1;
text = true;
}
}
if (ts == -1) {
have = 0;
/* text nodes have no ts because each byte is parsed alone */
if (mark_tag != -1 && text) {
if (done) {
if (mark_tag < p - 1) {
CAT(tag, p - 1);
ELE(sym_text);
}
}
else {
CAT(tag, p);
}
}
mark_tag = 0;
}
else {
have = pe - ts;
Array.Copy(buf, ts, buf, 0, have);
SLIDE(tag);
SLIDE(akey);
SLIDE(aval);
te = (te - ts);
ts = 0;
}
}
if (_state != null) {
return _state.Doc;
}
return null;
}
}
}