-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
xmlCom.ml
139 lines (125 loc) · 4.55 KB
/
xmlCom.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
(*
* XML parsing keeping everything (comments and declarations)
*
* Copyright (C) 2008, Cyril Allignol, Pascal Brisset
*
* This file is part of paparazzi.
*
* paparazzi is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* paparazzi is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with paparazzi; see the file COPYING. If not, write to
* the Free Software Foundation, 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*
*)
type state = A | B | C | D | D' | D'' | E
let children = function
Nethtml.Element (_tag, _params, children) -> children
| _ -> invalid_arg "XmlCom.children"
(** Translate <tag .../> to <tag ...></tag> and parse *)
let parse_file = fun file ->
ignore (ExtXml.parse_file file);
let buff = Buffer.create 5
and lookup = Buffer.create 5
and name = Buffer.create 5
and chin = open_in file in
let rec automata = fun state ->
let char = input_char chin in
let mem_and_continue = fun state ->
Buffer.add_char lookup char;
automata state
and copy_and_continue = fun state ->
Buffer.add_string buff (Buffer.contents lookup);
Buffer.clear lookup;
Buffer.add_char buff char;
automata state
and replace_and_continue = fun state ->
Buffer.add_string buff "></";
Buffer.add_string buff (Buffer.contents name);
Buffer.add_char buff '>';
Buffer.clear name; Buffer.clear lookup;
automata state
in
match state, char with
A, '<' -> copy_and_continue B
| A, _ -> copy_and_continue A
| B, '!' -> copy_and_continue A
| B, (' ' | '\t' | '\n') -> copy_and_continue B
| B, _ -> Buffer.add_char name char; copy_and_continue C
| C, (' ' | '\t' | '\n') -> copy_and_continue D
| C, '>' -> Buffer.clear name; copy_and_continue A
| C, '/' -> mem_and_continue E
| C, _ -> Buffer.add_char name char; copy_and_continue C
| D, '/' -> mem_and_continue E
| D, '>' -> Buffer.clear name; copy_and_continue A
| D, '"' -> copy_and_continue D'
| D, _ -> copy_and_continue D
(* Inside a quoted string *)
| D', '"' -> copy_and_continue D
| D', '\\' -> automata D''
| D', _ -> copy_and_continue D'
(* Inside a quoted string, just after a \ (backslash) *)
| D'', '"' -> Buffer.add_string buff """; automata D'
| D'', _ -> Buffer.add_char buff '\\'; copy_and_continue D'
| E, '>' -> replace_and_continue A
| E, _ -> copy_and_continue D
in
try
ignore (automata A); failwith "Fichier sans fin"
with End_of_file ->
let buff = Buffer.contents buff in
let lexbuf = Lexing.from_string buff in
Nethtml.Element ("root", [], Nethtml.parse_document ~return_comments:true ~return_declarations:true lexbuf)
(** Translate <tag ...></tag> to <tag .../> *)
let ugly2nice = fun file ->
let buff = Buffer.create 5
and lookup = Buffer.create 5
and chin = open_in file in
let rec automata = fun state ->
let char = input_char chin in
let mem_and_continue = fun state ->
Buffer.add_char lookup char;
automata state
and copy_and_continue = fun state ->
Buffer.add_string buff (Buffer.contents lookup);
Buffer.clear lookup;
Buffer.add_char buff char;
automata state
and replace_and_continue = fun state ->
Buffer.add_string buff "/>";
Buffer.clear lookup;
automata state
in
match state, char with
A, '>' -> mem_and_continue B
| A, _ -> copy_and_continue A
| B, '<' -> mem_and_continue C
| B, _ -> copy_and_continue A
| C, '/' -> mem_and_continue D
| C, _ -> copy_and_continue A
| D, '>' -> replace_and_continue A
| D, _ -> mem_and_continue D
| _ -> failwith "This shouldn't occur..."
in
try
ignore (automata A); failwith "Fichier sans fin"
with End_of_file ->
let s = Buffer.contents buff
and chout = open_out file in
Printf.fprintf chout "%s" s;
close_out chout
(** Write XML and translate elements with no children *)
let to_file = fun xml filename ->
let chout = new Netchannels.output_channel (open_out filename) in
Nethtml.write ~dtd:[] chout (children xml);
chout#close_out ();
ugly2nice filename