Browse files

Don't end XHTML IDs with punctuation

An ID ending with a punctuation character means that a URL with a fragment
ID linking to that section ends with a punctuation character. Such URLs
are awkward to extract from plain text, for example when used in a plain
text e-mail and being converted to a link by a mailing list archive or
e-mail client: a punctuation character at the end of a URL may be
interpreted as punctuation in the e-mail message following the URL.

Avoid the problem by ensuring that IDs never end with punctuation
characters. (Underscore is allowed, since it isn't used for punctuation in
human text.)
  • Loading branch information...
1 parent cfd7345 commit c58317fd9c3a2190423c1814584c036629d46cba @Smylers Smylers committed Oct 8, 2012
Showing with 16 additions and 6 deletions.
  1. +6 −0 lib/Pod/Simple/
  2. +9 −5 t/xhtml01.t
  3. +1 −1 t/xhtml10.t
@@ -733,6 +733,11 @@ underscores (_), colons (:), and periods (.).
=item *
+The final character can't be a hyphen, colon, or period. URLs ending with these
+characters, while allowed by XHTML, can be awkward to extract from plain text.
+=item *
Each id must be unique within the document.
@@ -754,6 +759,7 @@ sub idify {
s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
s/^[^a-zA-Z]+//; # First char must be a letter.
s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
+ s/[-:.]+$//; # Strip trailing punctuation.
return $t if $not_unique;
my $i = '';
@@ -23,29 +23,33 @@ my $MANURL = "";
initialize($parser, $results);
$parser->parse_string_document( "=head1 Poit!" );
-is($results, qq{<h1 id="Poit-">Poit!</h1>\n\n}, "head1 level output");
+is($results, qq{<h1 id="Poit">Poit!</h1>\n\n}, "head1 level output");
initialize($parser, $results);
$parser->parse_string_document( "=head2 Yada Yada Operator
X<...> X<... operator> X<yada yada operator>" );
is($results, qq{<h2 id="Yada-Yada-Operator">Yada Yada Operator </h2>\n\n}, "head ID with X<>");
initialize($parser, $results);
+$parser->parse_string_document( "=head2 Platforms with no supporting programmers:");
+is($results, qq{<h2 id="Platforms-with-no-supporting-programmers">Platforms with no supporting programmers:</h2>\n\n}, "head ID ending in colon");
+initialize($parser, $results);
$parser->parse_string_document( "=head1 Poit!" );
-is($results, qq{<h2 id="Poit-">Poit!</h2>\n\n}, "head1 level output h_level 2");
+is($results, qq{<h2 id="Poit">Poit!</h2>\n\n}, "head1 level output h_level 2");
initialize($parser, $results);
$parser->parse_string_document( "=head2 I think so Brain." );
-is($results, qq{<h2 id="I-think-so-Brain.">I think so Brain.</h2>\n\n}, "head2 level output");
+is($results, qq{<h2 id="I-think-so-Brain">I think so Brain.</h2>\n\n}, "head2 level output");
initialize($parser, $results);
$parser->parse_string_document( "=head3 I say, Brain..." );
-is($results, qq{<h3 id="I-say-Brain...">I say, Brain...</h3>\n\n}, "head3 level output");
+is($results, qq{<h3 id="I-say-Brain">I say, Brain...</h3>\n\n}, "head3 level output");
initialize($parser, $results);
$parser->parse_string_document( "=head4 Zort & Zog!" );
-is($results, qq{<h4 id="Zort-Zog-">Zort &amp; Zog!</h4>\n\n}, "head4 level output");
+is($results, qq{<h4 id="Zort-Zog">Zort &amp; Zog!</h4>\n\n}, "head4 level output");
sub x ($;&) {
my $code = $_[1];
@@ -23,7 +23,7 @@ for my $spec (
[ 'fo$bar' => 'fo-bar', 'fo-bar' ],
[ 'f12' => 'f12', 'f12' ],
[ '13' => 'pod13', 'pod13' ],
- [ '**.:' => 'pod-.:', 'pod-.:' ],
+ [ '**.:' => 'pod', 'pod' ],
) {
is $parser->idify( $spec->[0] ), $spec->[1],
qq{ID for "$spec->[0]" should be "$spec->[1]"};

0 comments on commit c58317f

Please sign in to comment.