Skip to content

Commit c84753e

Browse files
author
Daniel Herzog
committed
Initial implementation of HTTP header tokenizer
1 parent 4f337cf commit c84753e

File tree

3 files changed

+228
-0
lines changed

3 files changed

+228
-0
lines changed

src/client-en.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ window.load_screen_timeout = window.setTimeout(function()
353353
<script src="./syntaxhighlight/markup/syntax.js"/>
354354
<script src="./syntaxhighlight/css/tokenizer.js"/>
355355
<script src="./syntaxhighlight/css/syntax.js"/>
356+
<script src="./syntaxhighlight/http-header/tokenizer.js"/>
356357

357358

358359

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
"use strict";
2+
3+
window.cls = window.cls || {};
4+
5+
cls.HTTPHeaderTokenizer = function()
6+
{
7+
var CR = "\r";
8+
var LF = "\n";
9+
var PUNCTUATOR = ":";
10+
var WHITESPACE_CHARS =
11+
{
12+
'\u0009': 1, // Tab <TAB>
13+
'\u0020': 1, // Space <SP>
14+
};
15+
16+
this._buffer = "";
17+
this._current_pos = 0;
18+
this._token_buffer = "";
19+
this._state_handler = {};
20+
21+
this.tokenize = function(input_buffer, ontoken)
22+
{
23+
this._state_handler = this._state_handlers.FIRST_LINE_PART;
24+
this._buffer = input_buffer;
25+
this._emitToken = ontoken;
26+
while (this._state_handler !== this._state_handlers.EOF)
27+
{
28+
this._state_handler.apply(this);
29+
}
30+
31+
this._state_handlers.EOF.apply(this);
32+
};
33+
34+
this._state_handlers =
35+
{
36+
FIRST_LINE_PART: function()
37+
{
38+
if (this._is_EOF())
39+
{
40+
return false;
41+
}
42+
var c = this._buffer.charAt(this._current_pos++);
43+
this._token_type = cls.HTTPHeaderTokenizer.types.FIRST_LINE_PART;
44+
if (c in WHITESPACE_CHARS)
45+
{
46+
this._emitToken(this._token_type ,this._token_buffer);
47+
this._token_buffer = "";
48+
// For now, LF and whitespace add to the next token. Visually that makes no difference.
49+
}
50+
else
51+
if (c === LF)
52+
{
53+
this._emitToken(this._token_type ,this._token_buffer);
54+
this._token_buffer = "";
55+
this._emitToken(cls.HTTPHeaderTokenizer.types.LINE_SEPARATOR, c); // todo: don't emit your own token.
56+
this._state_handler = this._state_handlers.NAME;
57+
return false;
58+
}
59+
this._token_buffer += c;
60+
},
61+
NAME: function()
62+
{
63+
if (this._is_EOF())
64+
{
65+
return false;
66+
}
67+
var c = this._buffer.charAt(this._current_pos++);
68+
this._token_type = cls.HTTPHeaderTokenizer.types.NAME;
69+
if (c === PUNCTUATOR)
70+
{
71+
this._emitToken(this._token_type ,this._token_buffer);
72+
this._emitToken(cls.HTTPHeaderTokenizer.types.PUNCTUATOR, c);
73+
this._token_buffer = "";
74+
this._state_handler = this._state_handlers.VALUE;
75+
return false;
76+
}
77+
this._token_buffer += c;
78+
},
79+
VALUE: function()
80+
{
81+
if (this._is_EOF())
82+
{
83+
return false;
84+
}
85+
var c = this._buffer.charAt(this._current_pos++);
86+
this._token_type = cls.HTTPHeaderTokenizer.types.VALUE;
87+
// LF only means switching to header when the following char is not whitespace.
88+
if (c === LF && !(this._buffer.charAt(this._current_pos) in WHITESPACE_CHARS))
89+
{
90+
this._emitToken(this._token_type ,this._token_buffer);
91+
this._token_buffer = "";
92+
this._state_handler = this._state_handlers.NAME;
93+
// For now, LF and whitespace add to the next token. Visually that makes no difference.
94+
}
95+
this._token_buffer += c;
96+
},
97+
EOF: function()
98+
{
99+
this._emitToken(this._token_type, this._token_buffer);
100+
}
101+
};
102+
103+
this._is_EOF = function()
104+
{
105+
if (this._current_pos >= this._buffer.length)
106+
{
107+
this._state_handler = this._state_handlers.EOF;
108+
return true;
109+
}
110+
return false;
111+
}
112+
}
113+
114+
cls.HTTPHeaderTokenizer.types = {
115+
FIRST_LINE_PART : 1,
116+
NAME : 2,
117+
VALUE : 3,
118+
PUNCTUATOR : 4,
119+
LINE_SEPARATOR : 5
120+
};
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
<!DOCTYPE html>
2+
<html>
3+
4+
<link rel="stylesheet" href="../../src/ui-style/ui.css"/>
5+
<link rel="stylesheet" href="../../src/network/network_style.css"/>
6+
<link rel="stylesheet" href="../../src/ui-scripts/tooltip/tooltip.css"/>
7+
8+
<style>
9+
10+
html
11+
{
12+
margin: 5px;
13+
}
14+
15+
.type-1 {
16+
color: blue;
17+
}
18+
19+
.type-2 {
20+
color: red;
21+
}
22+
23+
.type-3 {
24+
color: green;
25+
}
26+
27+
.type-4 {
28+
color: #bada55;
29+
}
30+
31+
</style>
32+
33+
<script src="../../src/scripts/dom.js"></script>
34+
<script src="../../src/syntaxhighlight/http/tokenizer.js"></script>
35+
36+
<script>
37+
38+
var token_template = function(token)
39+
{
40+
var TYPE = 0;
41+
var STR = 1;
42+
return ["span", token[STR], "class", "type-" + token[TYPE]]
43+
}
44+
45+
var get_tokens = function(raw_headers)
46+
{
47+
var tokens = [];
48+
var tokenizer = new cls.HTTPHeaderTokenizer();
49+
50+
tokenizer.tokenize(raw_headers, function(token_type, token)
51+
{
52+
tokens.push([token_type, token]);
53+
});
54+
55+
return tokens;
56+
};
57+
58+
var raw_headers = "GET /dherzog/ HTTP/1.1 \n\
59+
User-Agent: Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.289 Version/12.00\n\
60+
Host: homes.oslo.osa\n\
61+
Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1\n\
62+
Accept-Language: en,en-US;q=0.9,de;q=0.8,ja;q=0.7,fr;q=0.6,es;q=0.5,it;q=0.4,pt;q=0.3,pt-PT;q=0.2,nl;q=0.1,sv;q=0.1,nb;q=0.1,da;q=0.1,fi;q=0.1,ru;q=0.1,pl;q=0.1,zh-CN;q=0.1,zh-TW;q=0.1,ko;q=0.1,ar;q=0.1,cs;q=0.1,hu;q=0.1,tr;q=0.1,ca;q=0.1,el;q=0.1,he;q=0.1,hr;q=0.1,ro;q=0.1,sk;q=0.1,th;q=0.1,uk;q=0.1\
63+
Accept-Encoding: gzip, deflate\n\
64+
Authorization: Basic XXX==\n\
65+
Referer: https://homes.oslo.osa/dherzog/\n\
66+
Cache-Control: no-cache\n\
67+
Connection: Keep-Alive";
68+
69+
var raw_headers2 = "GET /dherzog/ HTTP/1.1\n\
70+
User-Agent:Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.289 Version/12.00\n\
71+
Host:homes.oslo.osa\n\
72+
Accept:text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1\n\
73+
Accept-Language:en,en-US;q=0.9,de;q=0.8,ja;q=0.7,fr;q=0.6,es;q=0.5,it;q=0.4,pt;q=0.3,pt-PT;q=0.2,nl;q=0.1,sv;q=0.1,nb;q=0.1,da;q=0.1,fi;q=0.1,ru;q=0.1,pl;q=0.1,zh-CN;q=0.1,zh-TW;q=0.1,ko;q=0.1,ar;q=0.1,cs;q=0.1,hu;q=0.1,tr;q=0.1,ca;q=0.1,el;q=0.1,he;q=0.1,hr;q=0.1,ro;q=0.1,sk;q=0.1,th;q=0.1,uk;q=0.1\
74+
Accept-Encoding:gzip, deflate\n\
75+
Authorization:Basic XXX==\n\
76+
Referer:https://homes.oslo.osa/dherzog/\n\
77+
Cache-Control:no-cache\n\
78+
Connection:Keep-Alive";
79+
80+
var raw_headers3 = "GET /dherzog/ HTTP/1.1\n\
81+
User-Agent:Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.289 Version/12.00\n\
82+
Host:homes.oslo.osa\n\
83+
Accept:text/html, application/xml;q=0.9,\n\
84+
ooh-this-is-continued, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1\n\
85+
Accept-Language:en,en-US;q=0.9,de;q=0.8,ja;q=0.7,fr;q=0.6,es;q=0.5,it;q=0.4,pt;q=0.3,pt-PT;q=0.2,nl;q=0.1,sv;q=0.1,nb;q=0.1,da;q=0.1,fi;q=0.1,ru;q=0.1,pl;q=0.1,zh-CN;q=0.1,zh-TW;q=0.1,ko;q=0.1,ar;q=0.1,cs;q=0.1,hu;q=0.1,tr;q=0.1,ca;q=0.1,el;q=0.1,he;q=0.1,hr;q=0.1,ro;q=0.1,sk;q=0.1,th;q=0.1,uk;q=0.1\
86+
Accept-Encoding:gzip, \n\
87+
\t or even more continued\n\
88+
Authorization:Basic XXX==\n\
89+
Referer:https://homes.oslo.osa/dherzog/\n\
90+
Cache-Control:no-cache\n\
91+
Connection:Keep-Alive";
92+
93+
window.onload = function()
94+
{
95+
document.body.firstElementChild.render(get_tokens(raw_headers).map(token_template));
96+
document.body.firstElementChild.render([["br"], ["br"]]);
97+
document.body.firstElementChild.render(get_tokens(raw_headers2).map(token_template));
98+
document.body.firstElementChild.render([["br"], ["br"]]);
99+
document.body.firstElementChild.render(get_tokens(raw_headers3).map(token_template));
100+
}
101+
102+
</script>
103+
<body>
104+
<pre class="mono"></pre>
105+
</body>
106+
</html>
107+

0 commit comments

Comments
 (0)