-
Notifications
You must be signed in to change notification settings - Fork 0
/
xmlToHeader.pl
469 lines (398 loc) · 17.1 KB
/
xmlToHeader.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
#!/usr/bin/env perl
#Copyright (c) 2012, Stargazy Studios
#All Rights Reserved
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Stargazy Studios nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#xmlToHeader will search an input XSD file for ComplexTypes containing Processing
#Instructions with a target the same as a specified keyword (default 'uidGenerator').
#Elements of this ComplexType will be checked for in an input XML file. Those found will
#have a specified child Element's text value paired with an enumeration in a C header file
# (by default a header file is generated per ComplexType). The text value of the specified
# Element must be filtered to ensure that it's unique, and only contain valid
#characters for an enumeration name. The enumeration constitutes a (name,uid) pair.
#It is possible to manually set an Element's uid in an attribute. These take
#precedence over automatically generated uids, and collisions between manually set uids
#are caught. Gaps in the enumeration, caused by manually set uids not being continuous are
# also catered for when outputting the header file.
#Processing Instruction optional parameters:
# syntax: name=[value[|alt_value]*]
# prepName=["ComplexType"]: Prepend string to all enumeration names.
# -ComplexType: use type name of the Element.
# prepCase=["snakeUpper"]: Set the case of preposition.
# -snakeUpper: all upper case, with snake
# formatted spacing.
# scope=["global"]: Used to group Elements into different header files,
# depending on the scope specified.
# -global: group all Elements in the whole
# document of a single ComplexType.
#TODO: implement different scope and other PI options.
use warnings;
use strict;
use Getopt::Long;
use XML::LibXML;
use String::CamelCase qw(camelize decamelize wordsplit);
use Data::Dumper;
sub checkTypeAndExpandElement{
my ($element,$elementPath,$xmlData,$uidTypesHashRef,$uidElementsHashRef) = @_;
if ($element->hasAttribute("type")){
my $elementType = $element->getAttribute("type");
#if the element's complexType matches a uid keyword
if (exists $$uidTypesHashRef{$elementType}){
#check if this element has already been expanded, and if so terminate
if (exists $$uidElementsHashRef{$elementPath}){
return;
}
#otherwise, add the element path to the hash
else{
#DEBUG
#print "Storing $elementPath\n";
$$uidElementsHashRef{$elementPath} = $elementType;
}
}
#process child elements
foreach my $complexType ($xmlData->findnodes('/xs:schema/xs:complexType[@name="'.$elementType.'"]')){
foreach my $childElement ($complexType->findnodes("./xs:sequence/xs:element")){
if ($childElement->hasAttribute("name")){
my $childElementPath = $elementPath."/".$childElement->getAttribute("name");
checkTypeAndExpandElement($childElement,$childElementPath,$xmlData,$uidTypesHashRef,$uidElementsHashRef);
}
}
}
}
}
sub searchElements{
#Search the passed hash of XSD elements for Complex Type keywords, expanding any that
#are found to continue the search. As the name of an element can be duplicated within
#different types, the hierarchy of the path to the name must be stored along with it.
#XML element names can not contain spaces, so this character can be used to delineate
#members of the hierarchy.
#Loop detection can be made by comparing the hierarchy path element names to the
#current one under consideration.
my ($xmlData,$uidTypesHashRef,$uidElementsHashRef) = @_;
#iterate through all elements
foreach my $element ($xmlData->findnodes("/xs:schema/xs:element")){
#check element type against list of Type keywords
if ($element->hasAttribute("name")){
#DEBUG
#print "Processing ".$element->getAttribute("name")."\n";
checkTypeAndExpandElement($element,"/".$element->getAttribute("name"),$xmlData,$uidTypesHashRef,$uidElementsHashRef);
}
}
}
sub checkValidCVariableName{
my ($name,$cReservedWordsRE) = @_;
if ($name !~ /^[A-Za-z_][A-Za-z_0-9]*$/){
#invalid characters in the variable name
print "WARNING: Invalid characters dropped in name: $name. ";
$name =~ s/[^A-Za-z_0-9]//g; #drop all non-valid characters
if($name =~ /^[0-9].*$/){$name = "_" . $name;} #prepend with _ if 0-9 appears 1st
print "Sanitised to: $name\n";
}
if ($name =~ /^(?:$cReservedWordsRE)$/) {
#the variable name matches a reserved word
print "WARNING: Matched name with reserved word: $name. ";
$name = "_" . $name; #prepend with _
print "Trying sanitised name: $name\n";
$name = checkValidCVariableName($name,$cReservedWordsRE); #check again
}
return $name;
}
sub makeFreeHashKey{
my ($hashRef, $key) = @_;
while(exists $$hashRef{$key}){$key = "_".$key;} #prepend with _
return $key;
}
sub findNextFreeArrayIndex{
my ($arrayRef,$index) = @_;
while($$arrayRef[$index]){$index++;}
return $index;
}
my $piName = 'headerGenerator'; #Keyword to denote uid Processing Instruction.
my $nameKey = 'name'; #Keyword to denote name element.
my $uidKey = "uid"; #Keyword to denote uid attribute.
my $addCount = 0; #Will add named enumeration entries at the end of
#enumeration blocks, whose value is the number of
#total enumeration indices. Naming follows the
#enumeration prepend string convention, or the
#ifdef naming convention if a prepend string is
#not used.
my $xmlIn = '';
my $xsdIn = '';
my $outDir = '';
my $outPreFileName = '';
GetOptions( 'nameKey=s' => \$nameKey,
'uidKey=s' => \$uidKey,
'piName=s' => \$piName,
'addCount' => \$addCount,
'xmlIn=s' => \$xmlIn,
'xsdIn=s' => \$xsdIn,
'outDir=s' => \$outDir,
'outPreFileName=s' => \$outPreFileName);
#reserved words for checking validity of enumeration variable names
#via http://www.lemoda.net/c/variable-names/
my @cReservedWords = sort {length $b <=> length $a} qw/auto if break
int case long char register continue return default short do sizeof
double static else struct entry switch extern typedef float union for
unsigned goto while enum void const signed volatile/;
my $cReservedWordsRE = join '|', @cReservedWords;
#check outDir finishes with a slash if it contains one
if($outDir =~ /^.*[\/].*[^\/]$/){$outDir = "$outDir/";}
else{if($outDir =~ /^.*[\\].*[^\\]$/){$outDir = "$outDir\\";}}
my $parserLibXML = XML::LibXML->new();
#parse xsd schema to find keywords, storing array of Type names that contain the uid key
if(-e $xmlIn && -e $xsdIn){
my $xmlData = $parserLibXML->parse_file($xsdIn);
if($xmlData){
my %uidTypes; #store names of complexTypes with the matching processing
#instruction, a count of the number of elements of that type found
#, and the optional attributes of the processing instruction
#iterate through all complexTypes in the schema with the processing instruction
foreach my $type ($xmlData->findnodes('/xs:schema/xs:complexType[processing-instruction("'.$piName.'")]')){
if($type->hasAttribute("name")){
foreach my $childNode ($type->getChildNodes){
if( $childNode->nodeType eq XML_PI_NODE &&
$childNode->nodeName eq $piName){
my $nodeDataString = $childNode->getData();
$nodeDataString =~ s/"//g; #remove quotation marks
$uidTypes{$type->getAttribute("name")}[0] = -1; #Element count
$uidTypes{$type->getAttribute("name")}[1] = {split(/[ =]/,$nodeDataString)};
}
}
}
else{
print STDERR "ERROR: missing \"name\" attribute for XSD complexType. EXIT\n";
exit 1;
}
}
#DEBUG
#print Dumper(%uidTypes);
#on a second pass, identify which element names are of a Type requiring a uid
#-process xs:complexType:
#-process xs:element:
my %uidElements;
my $uidElementsHashRef = \%uidElements;
#recursively search for elements with keyword types and store hierarchy paths
searchElements($xmlData,\%uidTypes,$uidElementsHashRef);
#DEBUG check uidElements for correctness
#print Dumper($uidElementsHashRef);
#parse xml in file to find Types, counting them and creating enumeration keys
$xmlData = $parserLibXML->parse_file($xmlIn);
#validate xmlIn with xsdIn
my $xmlSchema = XML::LibXML::Schema->new('location' => $xsdIn);
eval {$xmlSchema->validate($xmlData);};
die $@ if $@;
#output either generated or manually set uids to header file per complexType
if($xmlData){
foreach my $elementPath (keys %uidElements){
my $uidElementType = $uidElements{$elementPath};
my @uidElementInstances = $xmlData->findnodes($elementPath);
if(@uidElementInstances > 0){
my $headerFileName = "$outPreFileName$uidElementType.h";
my $headerFilePath = "$outDir$headerFileName";
my @enumerations = '';
my %enumerationNames;
#store string to prepend all enumeration names, depending on
#processing instruction parameters
my $prepFinal = '';
if(exists $uidTypes{$uidElementType}[1]{"prepName"}){
if($uidTypes{$uidElementType}[1]{"prepName"} eq "ComplexType"){
$prepFinal = $uidElementType . "_";
if(exists $uidTypes{$uidElementType}[1]{"prepCase"}){
if($uidTypes{$uidElementType}[1]{"prepCase"} eq "snakeUpper"){
$prepFinal = uc(decamelize($prepFinal));
$prepFinal =~ s/ /_/g;
}
}
}
}
#store the final prepend string to name count if required
if($addCount && $prepFinal){
$uidTypes{$uidElementType}[1]{"prepFinal"} = $prepFinal;
}
#DEBUG
#print STDOUT "prependNameString: $prepFinal\n";
#open new file if this is the first element of its type
if($uidTypes{$uidElementType}[0] < 0){
$uidTypes{$uidElementType}[0] = 0;
my $date = localtime();
open(HFILE,">",$headerFilePath);
print HFILE qq~
#ifndef INC_\U$outPreFileName$uidElementType\E_H
#define INC_\U$outPreFileName$uidElementType\E_H
/*
* $headerFileName
*
* $date
*/
enum{
~;
}
else{open(HFILE,">>",$headerFileName);}
my $enumerationCount = $uidTypes{$uidElementType}[0];
foreach my $uidElement (@uidElementInstances){
#add enumeration key at the correct index, filtering name
my $enumName = '';
foreach my $nameElement ($uidElement->getChildrenByTagName($nameKey)){
my $saveUidFlag = 1; #can choose not to store the uid mapping
my $elementName = $nameElement->textContent;
#check if name is valid, and not a C reserved word
if($elementName){
$enumName = checkValidCVariableName($elementName,
$cReservedWordsRE);
if($prepFinal){$enumName = $prepFinal.$enumName;}
}
else{print STDERR "ERROR: missing \"$nameKey\" element content for ".
"element of type \"$uidElementType\". EXIT\n";
exit 1;
}
my $uidManualFlag = 0;
my $uidCandidate = '';
#check if the uid has been manually set for the element
if($uidElement->hasAttribute($uidKey)){
#check the stored uid is a valid, positive integer
if($uidElement->getAttribute($uidKey) =~ /^\d+$/){
$uidCandidate = $uidElement->getAttribute($uidKey);
$uidManualFlag = 1;
}
}
#if no valid manual uid has been set, then use the enumeration
#count
if(!$uidManualFlag){$uidCandidate = $enumerationCount;}
#check for collision with selected uid
if($enumerations[$uidCandidate]){
if($uidManualFlag){
#if there is a manually set uid mapping in place, exit
#if the names do not match
if($enumerations[$uidCandidate][1]){
if($enumName !~ $enumerations[$uidCandidate][0]){
print STDERR "ERROR: \"$uidCandidate\" uid candidate ".
"for element named \"$enumName\", has already been".
" set for $enumerations[$uidCandidate][0]. EXIT\n";
exit 1;
}
else{
print STDERR "WARNING: \"$uidCandidate\" uid candidate ".
"for element named \"$enumName\", has already been".
" set for the same name. IGNORING\n";
$saveUidFlag = 0;
}
}
else{
#displace the element with the automatically set uid
my $newUid = findNextFreeArrayIndex(\@enumerations,$uidCandidate);
$enumerations[$newUid] = $enumerations[$uidCandidate];
$enumerationCount = ($newUid + 1); #try next index next time
#update the enumerationNames hash for displaced mapping
$enumerationNames{$enumerations[$newUid][0]} = $enumerations[$newUid][1];
}
}
else{
#automatically increment enumerationCount, until no
#collision is found for those Elements without a
#manually set uid
$uidCandidate = findNextFreeArrayIndex(\@enumerations,$uidCandidate);
$enumerationCount = ($uidCandidate + 1);
}
}
#store the data
if($saveUidFlag){
#check name wanted has not already been used and sanitise
if(exists $enumerationNames{$enumName}){
print STDERR "WARNING: \"$enumName\" name has already been ".
"used for element with uid \"$enumerationNames{$enumName}. ";
$enumName = makeFreeHashKey(\%enumerationNames,$enumName);
print STDERR "Changing element name to \"$enumName\"\n";
}
if(!$uidManualFlag){
#if auto generated, store uid in attribute to create
#linkage between Element and enumeration index
$uidElement->setAttribute($uidKey,$uidCandidate);
#DEBUG
#print STDOUT "DEBUG: $enumName Element, Attribute set: $uidKey=$uidCandidate\n";
}
$enumerations[$uidCandidate] = [$enumName,$uidManualFlag];
$enumerationNames{$enumName} = $uidCandidate;
}
}
$uidTypes{$uidElementType}[0]=$enumerationCount;
}
#DEBUG check enumerations and enumerationNames for correctness
#print Dumper(@enumerations);
#print Dumper(%enumerationNames);
#output enumeration information to the header file
my $enumerationGapFlag = 0;
for(my $i = 0;$i<scalar(@enumerations);$i++){
if($enumerations[$i]){
print HFILE "$enumerations[$i][0]";
if($enumerationGapFlag){
print HFILE " = $i"; #reset enumeration count after gap
$enumerationGapFlag = 0;
}
#add comma and carriage return if not the last enumeration
if($i != (scalar(@enumerations)-1)){print HFILE ",\n"}
}
else{$enumerationGapFlag = 1;}
}
close(HFILE);
#overwrite XML document file in case any auto generated uids have been
#used
$xmlData->toFile($xmlIn);
}
}
#DEBUG
#print Dumper(%uidTypes);
#go through the uidTypes hash, and for all entries with a non-zero value
# we must add an extra line to the end of the file to close the "ifdef"
#header guard
while (my ($uidElementType,@uidTypeData) = each (%uidTypes)){
if($uidTypeData[0] >= 0){
my $headerFilePath = "$outDir$outPreFileName$uidElementType.h";
open(HFILE,">>",$headerFilePath);
if($addCount){ #inject count entry in enumeration if requested
my $prepCount = '';
if(exists $uidTypes{$uidElementType}[1]{"prepFinal"}){
$prepCount = $uidTypes{$uidElementType}[1]{"prepFinal"};
}
else{$prepCount = "\U$outPreFileName$uidElementType\E_";}
print HFILE ",\n".$prepCount."COUNT";
}
print HFILE "\n};\n\n#endif\n";
close(HFILE);
}
}
#DEBUG check uidElements for correctness
#print Dumper($uidElementsHashRef);
}
else{
print STDERR "xmlIn($xmlIn) is not a valid xml file. EXIT\n";
exit 1;
}
}
else{
print STDERR "xsdIn($xsdIn) is not a valid xml file. EXIT\n";
exit 1;
}
}
else{
print STDERR "Options --xsdIn --xmlIn are required. EXIT\n";
exit 1;
}