Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Big nasty HTML handling update

  • Loading branch information...
commit 83ddfab81e193fec3b8c2368b95ce1d2cb0f4abe 1 parent 83ecec7
Chris Nandor authored April 13, 2005
39  Slash/DB/MySQL/MySQL.pm
@@ -8974,14 +8974,13 @@ sub getSlashConf {
8974 8974
 						# See <http://www.iana.org/assignments/uri-schemes>
8975 8975
 		anonymous_coward_uids =>	[ $conf{anonymous_coward_uid} ],
8976 8976
 		approved_url_schemes =>		[qw( ftp http gopher mailto news nntp telnet wais https )],
8977  
-		approvedtags =>			[qw( B I P A LI OL UL EM BR TT STRONG BLOCKQUOTE DIV ECODE DL DT DD)],
8978  
-		approvedtags_break =>		[qw( P LI OL UL BR BLOCKQUOTE DIV HR DL DT DD)],
  8977
+		approvedtags =>			[qw( b i p br a ol ul li dl dt dd em strong tt blockquote div ecode)],
  8978
+		approvedtags_break =>		[qw(     p br   ol ul li dl dt dd              blockquote div       img hr)],
8979 8979
 		charrefs_bad_entity =>		[qw( zwnj zwj lrm rlm )],
8980 8980
 		charrefs_bad_numeric =>		[qw( 8204 8205 8206 8207 8236 8237 8238 )],
8981 8981
 		charrefs_good_entity =>		[qw( amp lt gt euro pound yen )],
8982 8982
 		charrefs_good_numeric =>	[ ],
8983 8983
 		cur_performance_stat_ops =>	[ ],
8984  
-		lonetags =>			[qw( P LI BR IMG DT DD)],
8985 8984
 		fixhrefs =>			[ ],
8986 8985
 		hc_possible_fonts =>		[ ],
8987 8986
 		lonetags =>			[ ],
@@ -9073,25 +9072,29 @@ sub getSlashConf {
9073 9072
 		$conf{$regex} = qr{$conf{$regex}};
9074 9073
 	}
9075 9074
 
  9075
+	for my $var (qw(approvedtags approvedtags_break lonetags)) {
  9076
+		$conf{$var} = [ map lc, @{$conf{$var}} ];
  9077
+	}
  9078
+
9076 9079
 	if ($conf{approvedtags_attr}) {
9077 9080
 		my $approvedtags_attr = $conf{approvedtags_attr};
9078 9081
 		$conf{approvedtags_attr} = {};
9079  
-		my @tags = split(/\s+/, $approvedtags_attr);
9080  
-		foreach my $tag(@tags){
9081  
-			my ($tagname,$attr_info) = $tag=~/([^:]*):(.*)$/;
9082  
-			my @attrs = split( ",", $attr_info );
9083  
-			my $ord=1;
9084  
-			foreach my $attr(@attrs){
9085  
-				my($at,$extra) = split( /_/, $attr );
9086  
-				$at = uc($at);
9087  
-				$tagname = uc($tagname);
9088  
-				$conf{approvedtags_attr}->{$tagname}{$at}{ord}=$ord;
9089  
-				$conf{approvedtags_attr}->{$tagname}{$at}{req}=1 if $extra=~/R/;
9090  
-				$conf{approvedtags_attr}->{$tagname}{$at}{url}=1 if $extra=~/U/;
9091  
-				$ord++
  9082
+		my @tags = split /\s+/, $approvedtags_attr;
  9083
+		foreach my $tag (@tags){
  9084
+			my($tagname, $attr_info) = $tag =~ /([^:]*):(.*)$/;
  9085
+			my @attrs = split ',', $attr_info;
  9086
+			my $ord = 1;
  9087
+			foreach my $attr (@attrs){
  9088
+				my($at, $extra) = split /_/, $attr;
  9089
+				$at = lc $at;
  9090
+				$tagname = lc $tagname;
  9091
+				$conf{approvedtags_attr}{$tagname}{$at}{ord} = $ord;
  9092
+				$conf{approvedtags_attr}{$tagname}{$at}{req} = 1 if $extra =~ /R/;
  9093
+				$conf{approvedtags_attr}{$tagname}{$at}{req} = 2 if $extra =~ /N/; # "necessary"
  9094
+				$conf{approvedtags_attr}{$tagname}{$at}{url} = 1 if $extra =~ /U/;
  9095
+				$ord++;
9092 9096
 			}
9093  
-		}   
9094  
-
  9097
+		}
9095 9098
 	}
9096 9099
 
9097 9100
 	# We only need to do this on startup.
2  Slash/Slash.pm
@@ -695,7 +695,7 @@ sub printComments {
695 695
 			# add them back at the last step.  In-between, we chop
696 696
 			# the comment down to size, then massage it to make sure
697 697
 			# we still have good HTML after the chop.
698  
-			$more_comment_text->{$cid} =~ s{</A[^>]+>}{</A>}gi;
  698
+			$more_comment_text->{$cid} =~ s{</a[^>]+>}{</a>}gi;
699 699
 			my $text = chopEntity($more_comment_text->{$cid},
700 700
 				$user->{maxcommentsize});
701 701
 			$text = strip_html($text);
519  Slash/Utility/Data/Data.pm
@@ -1023,8 +1023,8 @@ my %actions = (
1023 1023
 	space_between_tags => sub {
1024 1024
 			${$_[0]} =~ s/></> </g;				},
1025 1025
 	whitespace_tagify => sub {
1026  
-			${$_[0]} =~ s/\n/<BR>/gi;  # pp breaks
1027  
-			${$_[0]} =~ s/(?:<BR>\s*){2,}<BR>/<BR><BR>/gi;
  1026
+			${$_[0]} =~ s/\n/<br>/gi;  # pp breaks
  1027
+			${$_[0]} =~ s/(?:<br>\s*){2,}<br>/<br><br>/gi;
1028 1028
 			# Preserve leading indents / spaces
1029 1029
 			# can mess up internal tabs, oh well
1030 1030
 			${$_[0]} =~ s/\t/    /g;			},
@@ -1033,10 +1033,10 @@ my %actions = (
1033 1033
 				("&nbsp; " x (length($1)/2)) .
1034 1034
 				(defined($2) ? "&nbsp;$2" : "")
1035 1035
 			}eg;
1036  
-			${$_[0]} = "<TT>${$_[0]}</TT>";			},
  1036
+			${$_[0]} = "<tt>${$_[0]}</tt>";			},
1037 1037
 	newline_indent => sub {
1038  
-			${$_[0]} =~ s{<BR>\n?( +)} {
1039  
-				"<BR>\n" . ('&nbsp; ' x length($1))
  1038
+			${$_[0]} =~ s{<br>\n?( +)} {
  1039
+				"<br>\n" . ('&nbsp; ' x length($1))
1040 1040
 			}ieg;						},
1041 1041
 	remove_tags => sub {
1042 1042
 			${$_[0]} =~ s/<.*?>//gs;			},
@@ -1348,7 +1348,8 @@ sub processCustomTags {
1348 1348
 	## -- pudge
1349 1349
 
1350 1350
 	# ECODE must be in approvedtags
1351  
-	if (grep /^ECODE$/, @{$constants->{approvedtags}}) {
  1351
+	if (grep /^ecode$/i, @{$constants->{approvedtags}}) {
  1352
+		$str =~ s|<(/?)literal>|<${1}ecode>|gi;  # we used to accept "literal" too
1352 1353
 		my $ecode   = 'ecode';
1353 1354
 		my $open    = qr[\n* <\s* (?:$ecode) (?: \s+ END="(\w+)")? \s*> \n*]xsio;
1354 1355
 		my $close_1 = qr[($open (.*?) \n* <\s* /\2    \s*> \n*)]xsio;  # if END is used
@@ -1683,33 +1684,35 @@ sub approveTag {
1683 1684
 	# Build the hash of approved tags.
1684 1685
 	my $approvedtags = getCurrentStatic('approvedtags');
1685 1686
 	my %approved =
1686  
-		map  { (uc($_), 1)   }
1687  
-		grep { $_ ne 'ECODE' }
  1687
+		map  { (lc, 1)   }
  1688
+		grep { !/^ecode$/i }
1688 1689
 		@$approvedtags;
1689 1690
 
1690 1691
 	# We can do some checks at this point.  $t is the tag minus its
1691  
-	# properties, e.g. for "<A HREF=foo>", $t will be "A".
  1692
+	# properties, e.g. for "<a href=foo>", $t will be "a".
1692 1693
 	my($taglead, $slash, $t) = $wholetag =~ m{^(\s*(/?)\s*(\w+))};
1693  
-	my $t_uc = uc $t;
1694  
-	if (!$approved{$t_uc}) {
  1694
+	my $t_lc = lc $t;
  1695
+	if (!$approved{$t_lc}) {
  1696
+		$Slash::Data::Utility::approveTag::removed->{$t_lc}++
  1697
+			if getCurrentStatic('approveTag_debug');
1695 1698
 		return '';
1696 1699
 	}
1697 1700
 	
1698 1701
 	# These are now stored in a var approvedtags_attr
1699 1702
 	#
1700 1703
 	# A string in the format below:
1701  
-	# a:href_RU img:src_RU,alt,width,height,longdesc_U
  1704
+	# a:href_RU img:src_RU,alt_N,width,height,longdesc_U
1702 1705
 	# 
1703 1706
 	# Is decoded into the following data structure for attribute
1704 1707
 	# approval
1705 1708
 	#
1706 1709
 	# {
1707  
-	#	A =>	{ HREF =>	{ ord => 1, req => 1, url => 1 } },
1708  
-	#	IMG =>	{ SRC =>	{ ord => 1, req => 1, url => 1 },
1709  
-	#		  ALT =>	{ ord => 2                     },
1710  
-	#		  WIDTH =>	{ ord => 3                     },
1711  
-	#		  HEIGHT =>	{ ord => 4                     },
1712  
-	#		  LONGDESC =>	{ ord => 5,           url => 1 }, },
  1710
+	#	a =>	{ href =>	{ ord => 1, req => 1, url => 1 } },
  1711
+	#	img =>	{ src =>	{ ord => 1, req => 1, url => 1 },
  1712
+	#		  alt =>	{ ord => 2, req => 2           },
  1713
+	#		  width =>	{ ord => 3                     },
  1714
+	#		  height =>	{ ord => 4                     },
  1715
+	#		  longdesc =>	{ ord => 5,           url => 1 }, },
1713 1716
 	# }
1714 1717
 	# this is decoded in Slash/DB/MySQL.pm getSlashConf
1715 1718
 
@@ -1719,13 +1722,13 @@ sub approveTag {
1719 1722
 	if ($slash) {
1720 1723
 
1721 1724
 		# Close-tags ("</A>") never get attributes.
1722  
-		$wholetag = "/$t";
  1725
+		$wholetag = "/$t_lc";
1723 1726
 
1724  
-	} elsif ($attr->{$t_uc}) {
  1727
+	} elsif ($attr->{$t_lc}) {
1725 1728
 
1726 1729
 		# This is a tag with attributes, verify them.
1727 1730
 
1728  
-		my %allowed = %{$attr->{$t_uc}};
  1731
+		my %allowed = %{$attr->{$t_lc}};
1729 1732
 		my %required =
1730 1733
 			map  { $_, $allowed{$_}  }
1731 1734
 			grep { $allowed{$_}{req} }
@@ -1736,29 +1739,40 @@ sub approveTag {
1736 1739
 		# look_down() can return a string for some kinds of bogus data
1737 1740
 		return "" unless $elem && ref($elem) eq 'HTML::Element';
1738 1741
 		my @attr_order =
1739  
-			sort { $allowed{uc $a}{ord} <=> $allowed{uc $b}{ord} }
1740  
-			grep { !/^_/ && exists $allowed{uc $_} }
  1742
+			sort { $allowed{lc $a}{ord} <=> $allowed{lc $b}{ord} }
  1743
+			grep { !/^_/ && exists $allowed{lc $_} }
1741 1744
 			$elem->all_attr_names;
1742 1745
 		my %attr_data  = map { ($_, $elem->attr($_)) } @attr_order;
1743  
-		my $num_req_found = 0;
1744  
-		$wholetag = "$t_uc";
  1746
+		my %found;
  1747
+		$wholetag = $t_lc;
  1748
+
1745 1749
 		for my $a (@attr_order) {
1746  
-			my $a_uc = uc $a;
1747  
-			next unless $allowed{$a_uc};
1748  
-			my $data = $attr_data{$a};
1749  
-			$data = fudgeurl($data) if $allowed{$a_uc}{url};
  1750
+			my $a_lc = lc $a;
  1751
+			next unless $allowed{$a_lc};
  1752
+			my $data = $attr_data{$a_lc};
  1753
+			$data = fudgeurl($data) if $allowed{$a_lc}{url};
1750 1754
 			next unless $data;
1751  
-			$wholetag .= qq{ $a_uc="$data"};
1752  
-			++$num_req_found if $required{$a_uc};
  1755
+			$wholetag .= qq{ $a_lc="$data"};
  1756
+			++$found{$a_lc} if $required{$a_lc};
1753 1757
 		}
  1758
+
1754 1759
 		# If the required attributes were not all present, the whole
1755  
-		# tag is invalid.
1756  
-		return '' unless $num_req_found == scalar(keys %required);
  1760
+		# tag is invalid, unless req == 2, in which case we fudge it
  1761
+		for my $a (keys %required) {
  1762
+			my $a_lc = lc $a;
  1763
+			next if $found{$a_lc};
  1764
+			if ($required{$a}{req} == 2) {
  1765
+				# is there some better default than "*"?
  1766
+				$wholetag .= qq{ $a_lc="*"};
  1767
+			} else {
  1768
+				return '';
  1769
+			}
  1770
+		}
1757 1771
 
1758 1772
 	} else {
1759 1773
 
1760 1774
 		# No attributes allowed.
1761  
-		$wholetag = $t;
  1775
+		$wholetag = $t_lc;
1762 1776
 
1763 1777
 	}
1764 1778
 
@@ -1823,6 +1837,9 @@ sub approveCharref {
1823 1837
 			# Unknown, assume flawed.
1824 1838
 			$ok = 0;
1825 1839
 		}
  1840
+
  1841
+		# NB: 1114111/10FFFF is highest allowed by Unicode spec,
  1842
+		# but 917631/E007F is highest with actual glyph
1826 1843
 		$ok = 0 if $decimal <= 0 || $decimal > 65534; # sanity check
1827 1844
 		if ($constants->{draconian_charrefs}) {
1828 1845
 			if (!$constants->{good_numeric}{$decimal}) {
@@ -1840,7 +1857,8 @@ sub approveCharref {
1840 1857
 				$ok = $latin1_to_ascii{$decimal} ? 2 : 0;
1841 1858
 			}
1842 1859
 		} else {
1843  
-			$ok = 0 if $constants->{bad_entity}{$entity};
  1860
+			$ok = 0 if $constants->{bad_entity}{$entity}
  1861
+				|| ($constants->{draconian_charset} && ! exists $entity2char{$entity});
1844 1862
 		}
1845 1863
 	} elsif ($ok == 1) {
1846 1864
 		# Unknown character reference type, assume flawed.
@@ -2246,7 +2264,7 @@ sub HTML::FormatText::AddRefs::get_refs {
2246 2264
 
2247 2265
 #========================================================================
2248 2266
 
2249  
-=head2 balanceTags(HTML [, DEEP_NESTING])
  2267
+=head2 balanceTags(HTML [, OPTIONS])
2250 2268
 
2251 2269
 Balances HTML tags; if tags are not closed, close them; if they are not
2252 2270
 open, remove close tags; if they are in the wrong order, reorder them
@@ -2262,10 +2280,23 @@ open, remove close tags; if they are in the wrong order, reorder them
2262 2280
 
2263 2281
 The HTML to balance.
2264 2282
 
2265  
-=item DEEP_NESTING
  2283
+=item OPTIONS
  2284
+
  2285
+A hashref for various options.
  2286
+
  2287
+=over 4
  2288
+
  2289
+=item deep_nesting
  2290
+
  2291
+Integer for how deep to allow nesting indenting tags, 0 means no limit, 1 means
  2292
+to use var (nesting_maxdepth).  Default is 0.
2266 2293
 
2267  
-Integer for how deep to allow nesting indenting tags, 0 means
2268  
-no limit.
  2294
+=item deep_su
  2295
+
  2296
+Integer for how deep to allow nesting sup/sub tags, 0 means no limit, 1 means
  2297
+to use var (nest_su_maxdepth).  Default is 0.
  2298
+
  2299
+=back
2269 2300
 
2270 2301
 =back
2271 2302
 
@@ -2275,95 +2306,381 @@ The balanced HTML.
2275 2306
 
2276 2307
 =item Dependencies
2277 2308
 
2278  
-The 'approvedtags' and 'lonetags' entries in the vars table.
  2309
+The 'approvedtags' entry in the vars table.
2279 2310
 
2280 2311
 =back
2281 2312
 
2282 2313
 =cut
2283 2314
 
  2315
+{
  2316
+	# these are the tags we know about.
  2317
+	# they are hardcoded because the code must know about each one at 
  2318
+	# a fairly low level; if you want to add more, then we need to
  2319
+	# change the code for them.  in theory we could generalize it more,
  2320
+	# using vars for all this, but that is a low priority.
  2321
+	my %known_tags	= map { ( lc, 1 ) } qw(
  2322
+		b i p br a ol ul li dl dt dd em strong tt blockquote div ecode
  2323
+		img hr big small sub sup cite code
  2324
+		h1 h2 h3 h4 h5 h6
  2325
+	);
  2326
+	# NB: ECODE is excluded because it is handled elsewhere.
  2327
+	
  2328
+	# tags that are indented, so we can make sure indentation level is not too great
  2329
+	my %is_nesting  = map { ( lc, 1 ) } qw(ol ul dl blockquote);
  2330
+
  2331
+	# or sub-super level
  2332
+	my %is_suscript = map { ( lc, 1 ) } qw(sub sup);
  2333
+
  2334
+	# block elements cannot be inside certain other elements; this defines which are which
  2335
+	my %is_block    = map { ( lc, 1 ) } qw(p ol ul li dl dt dd blockquote div hr h1 h2 h3 h4 h5 h6);
  2336
+	my %no_block    = map { ( lc, 1 ) } qw(p dt b i strong em tt cite code big small sub sup a h1 h2 h3 h4 h5 h6);
  2337
+
  2338
+	# when a style tag is cut off prematurely because of a newly introduced block
  2339
+	# element, we want to re-start the style inside the block; it is not perfect,
  2340
+	# but that's why we're here, innit?
  2341
+	my %is_style    = map { ( lc, 1 ) } qw( b i strong em tt cite code big small);
  2342
+
  2343
+	# tags that CAN be empty
  2344
+	my %empty	= map { ( lc, 1 ) } qw(p br img hr);
  2345
+	# tags that HAVE to be empty
  2346
+	my %really_empty = %empty;
  2347
+	# for now p is the only one ... var?
  2348
+	delete $really_empty{'p'};
  2349
+
  2350
+
  2351
+	# define the lists, and the content elements in the lists, in both directions
  2352
+	my %lists = (
  2353
+		dl	=> ['dd', 'dt'],
  2354
+		ul	=> ['li'],
  2355
+		ol	=> ['li'],
  2356
+	);
  2357
+	my %needs_list = (
  2358
+		dd	=> qr/dl/,
  2359
+		dt	=> qr/dl/,
  2360
+		li	=> qr/ul|ol/,
  2361
+	);
  2362
+
  2363
+	# regexes to use later
  2364
+	my $list_re = join '|', keys %lists;
  2365
+	my %lists_re;
  2366
+	for my $list (keys %lists) {
  2367
+		my $re = join '|', @{$lists{$list}};
  2368
+		$lists_re{$list} = qr/$re/;
  2369
+	}
  2370
+
2284 2371
 sub balanceTags {
2285  
-	my($html, $max_nest_depth) = @_;
2286  
-	my(%tags, @stack, $match, %lone, $tag, $close, $whole);
  2372
+	my($html, $options) = @_;
2287 2373
 	my $constants = getCurrentStatic();
  2374
+	my $cache = getCurrentCache();
2288 2375
 
2289  
-	# set up / get preferences
2290  
-	if (@{$constants->{lonetags}}) {
2291  
-		# ECODE is an exception, to be handled elsewhere
2292  
-		$match = join '|', grep !/^ECODE$/,
2293  
-			@{$constants->{approvedtags}};
  2376
+	my($max_nest_depth, $max_su_depth) = (0, 0);
  2377
+	if (ref $options) {
  2378
+		$max_nest_depth = $options->{deep_nesting} == 1 ? $constants->{nesting_maxdepth} : $options->{deep_nesting};
  2379
+		$max_su_depth   = $options->{deep_su}      == 1 ? $constants->{nest_su_maxdepth} : $options->{deep_su};
2294 2380
 	} else {
2295  
-		$constants->{lonetags} = [qw(P LI BR IMG)];
2296  
-		$match = join '|', grep !/^(?:P|LI|BR|ECODE)$/,
2297  
-			@{$constants->{approvedtags}};
  2381
+		# deprecated
  2382
+		$max_nest_depth = $options == 1 ? $constants->{nesting_maxdepth} : $options;
  2383
+	}
  2384
+
  2385
+	my(%tags, @stack, $tag, $close, $whole, $both, @list, $nesting_level, $su_level);
  2386
+
  2387
+	# cache this regex
  2388
+	# if $options->{admin} then allow different regex ... also do in approveTag
  2389
+	my $match = $cache->{balanceTags}{match};
  2390
+	if (!$match) {
  2391
+		$match = join '|', grep $known_tags{$_},
  2392
+			map lc, @{$constants->{approvedtags}};
  2393
+		$cache->{balanceTags}{match} = $match = qr/$match/;
2298 2394
 	}
2299  
-	%lone = map { ($_, 1) } @{$constants->{lonetags}};
2300  
-	my %is_breaking = map { ( $_, 1 ) } @{$constants->{approvedtags_break}};
2301 2395
 
2302  
-	while ($html =~ /(<(\/?)($match)\b[^>]*>)/igo) { # loop over tags
2303  
-		($tag, $close, $whole) = (uc($3), $2, $1);
  2396
+	## this is the main loop.  it finds a tag, any tag
  2397
+	while ($html =~ /(<(\/?)($match)\b[^>]*?( \/)?>)/sig) { # loop over tags
  2398
+		($tag, $close, $whole, $both) = (lc($3), $2, $1, $4);
  2399
+#		printf "DEBUG:%d:%s:%s: %d:%s\n%s\n\n", pos($html), $tag, $whole, scalar(@stack), "@stack", $html;
2304 2400
 
  2401
+		# this is a closing tag (note: not an opening AND closing tag,
  2402
+		# like <br /> ... that is handled with opening tags)
2305 2403
 		if ($close) {
2306  
-			if (@stack && $tags{$tag}) {
2307  
-				# Close the tag on the top of the stack
  2404
+			# we have opened this tag already, handle closing of it
  2405
+			if (!$really_empty{$tag} && @stack && $tags{$tag}) {
  2406
+				# the tag is the one on the top of the stack,
  2407
+				# remove from stack and counter, and move on
2308 2408
 				if ($stack[-1] eq $tag) {
2309  
-					$tags{$tag}--;
2310 2409
 					pop @stack;
  2410
+					$tags{$tag}--;
2311 2411
 
2312  
-				# Close tag somewhere else in stack
2313  
-				} else {
2314  
-					my $p = pos($html) - length($whole);
2315  
-					if (exists $lone{$stack[-1]}) {
2316  
-						pop @stack;
2317  
-					} else {
2318  
-						substr($html, $p, 0) = "</$stack[-1]>";
  2412
+					# we keep track of lists in an add'l stack,
  2413
+					# so pop off that one too
  2414
+					if ($lists{$tag}) {
  2415
+						my $pop = pop @list;
  2416
+						# this should always be equal, else why
  2417
+						# would it be bottom of @stack too?
  2418
+						# so warn if it isn't ...
  2419
+						warn "huh?  $tag ne $pop?" if $tag ne $pop;
2319 2420
 					}
2320  
-					pos($html) = $p;  # don't remove this from stack, go again
  2421
+
  2422
+				# Close tag somewhere else in stack; add it to the
  2423
+				# text and then loop back to catch it properly
  2424
+				# XXX we could optimize here so we don't need to loop back
  2425
+				} else {
  2426
+					_substitute(\$html, $whole, "</$stack[-1]>", 1);
2321 2427
 				}
2322 2428
 
  2429
+			# Close tag not on stack; just delete it, since it is
  2430
+			# obviously not needed
2323 2431
 			} else {
2324  
-				# Close tag not on stack; just delete it
2325  
-				my $p = pos($html) - length($whole);
2326  
-				substr($html, $p, length($whole)) = '';
2327  
-				pos($html) = $p;
  2432
+				_substitute(\$html, $whole, '');
2328 2433
 			}
2329 2434
 
2330  
-		} else {
2331  
-			$tags{$tag}++;
2332  
-			push @stack, $tag;
2333 2435
 
2334  
-			# No <A>...</A> tag is allowed to stretch over a
2335  
-			# breaking tag.  If we're currently in <A> text
2336  
-			# and this is a breaking tag, insert a </A> before
2337  
-			# it, and yank the <A> out of the middle of the
2338  
-			# stack so we don't try to close it later.
2339  
-			# Actually, do that as many times as we have
2340  
-			# nested <A>s (which we shouldn't have anyway).
2341  
-			if (!$constants->{anchortags_bridge_breaks}
2342  
-				&& $is_breaking{$tag}
2343  
-				&& $tags{A}) {
2344  
-				my $p = pos($html) - length($whole);
2345  
-				substr($html, $p, 0) = ("</A>" x $tags{A});
2346  
-				@stack = grep !/^A$/, @stack;
2347  
-				$tags{A} = 0;
  2436
+		# this is an open tag (or combined, like <br />)
  2437
+		} else {
  2438
+			# the tag nests, and we don't want to nest too deeply,
  2439
+			# so just remove it if we are in too deep already
  2440
+			if ($is_nesting{$tag} && $max_nest_depth) {
  2441
+				my $cur_depth = 0;
  2442
+				$cur_depth += $tags{$_} for keys %is_nesting;
  2443
+				if ($cur_depth >= $max_nest_depth) {
  2444
+					_substitute(\$html, $whole, '');
  2445
+					next;
  2446
+				}
2348 2447
 			}
2349 2448
 
2350  
-			if ($max_nest_depth) {
  2449
+			# the tag nests, and we don't want to nest too deeply,
  2450
+			# so just remove it if we are in too deep already
  2451
+			if ($is_suscript{$tag} && $max_su_depth) {
2351 2452
 				my $cur_depth = 0;
2352  
-				for (qw( UL OL DIV BLOCKQUOTE DL )) { $cur_depth += $tags{$_} }
2353  
-				return undef if $cur_depth > $max_nest_depth;
  2453
+				$cur_depth += $tags{$_} for keys %is_suscript;
  2454
+				if ($cur_depth >= $max_su_depth) {
  2455
+					_substitute(\$html, $whole, '');
  2456
+					next;
  2457
+				}
  2458
+			}
  2459
+
  2460
+			# we are directly inside a list (UL), but this tag must be
  2461
+			# a list element (LI)
  2462
+			# this comes now because it could include a closing tag
  2463
+			if (@stack && $lists{$stack[-1]} && !(grep { $tag eq $_ } @{$lists{$stack[-1]}}) ) {
  2464
+				my $replace = $lists{$stack[-1]}[0];
  2465
+				_substitute(\$html, $whole, "<$replace>$whole");
  2466
+				$tags{$replace}++;
  2467
+				push @stack, $replace;
  2468
+			}
  2469
+
  2470
+			if ($needs_list{$tag}) {
  2471
+				# tag needs a list, like an LI needs a UL or OL, but we
  2472
+				# are not inside one: replace it with a P.  not pretty,
  2473
+				# but you should be more careful about what you put in there!
  2474
+				if (!@list || $list[-1] !~ /^(?:$needs_list{$tag})$/) {
  2475
+					my $replace = @list ? $lists{$list[-1]}[0] : 'p';
  2476
+					_substitute(\$html, $whole, "<$replace>");
  2477
+					pos($html) -= length("<$replace>");
  2478
+					next;  # try again
  2479
+
  2480
+				# we are inside a list (UL), and opening a new list item (LI),
  2481
+				# but a previous one is already open
  2482
+				} else {
  2483
+					for my $check (reverse @stack) {
  2484
+						last if $check =~ /^(?:$needs_list{$tag})/;
  2485
+						if ($needs_list{$check}) {
  2486
+							my $newtag = '';
  2487
+							while (my $pop = pop @stack) {
  2488
+								$tags{$pop}--;
  2489
+								$newtag .= "</$pop>";
  2490
+								last if $needs_list{$pop};
  2491
+							}
  2492
+							_substitute(\$html, $whole, $newtag . $whole);
  2493
+							last;
  2494
+						}
  2495
+					}
  2496
+				}
2354 2497
 			}
  2498
+
  2499
+			# if we are opening a block tag, make sure no open no_block
  2500
+			# tags are on the stack currently.  if they are, close them
  2501
+			# first!
  2502
+			if ($is_block{$tag} || $tag eq 'a' || $tag eq 'br') {
  2503
+				# a is a special case for a and br: we do not want a or b tags
  2504
+				# to be included in a tags, even though they are not blocks;
  2505
+				# another var for this special case?
  2506
+				my @no_block = ($tag eq 'a' || $tag eq 'br') ? 'a' : keys %no_block; 
  2507
+				my $newtag  = '';  # close no_block tags
  2508
+				my $newtag2 = '';  # re-open closed style tags inside block
  2509
+
  2510
+				while (grep { $tags{$_} } @no_block) {
  2511
+					my $pop = pop @stack;
  2512
+					$tags{$pop}--;
  2513
+					$newtag .= "</$pop>";
  2514
+					if ($is_style{$pop}) {
  2515
+						$newtag2 = "<$pop>" . $newtag2;
  2516
+					}
  2517
+				}
  2518
+
  2519
+				if ($newtag) {
  2520
+					_substitute(\$html, $whole, $newtag . $whole . $newtag2);
  2521
+					# loop back to catch newly added tags properly
  2522
+					# XXX we could optimize here so we don't need to loop back
  2523
+					pos($html) -= length($whole . $newtag2);
  2524
+					next;
  2525
+				}
  2526
+			}
  2527
+
  2528
+			# the tag must be an empty tag, e.g. <br />; if it has $both, do
  2529
+			# nothing, else add the " /".  since we are closing the tag
  2530
+			# here, we don't need to add it to the stack
  2531
+			if ($really_empty{$tag} || ($empty{$tag} && $both)) {
  2532
+				# this is the only difference we have between
  2533
+				# XHTML and HTML, in this part of the code
  2534
+				if ($constants->{xhtml} && !$both) {
  2535
+					(my $newtag = $whole) =~ s/^<(.+?)>$/<$1 \/>/;
  2536
+					_substitute(\$html, $whole, $newtag);
  2537
+				} elsif (!$constants->{xhtml} && $both) {
  2538
+					(my $newtag = $whole) =~ s/^<(.+?)>$/<$1>/;
  2539
+					_substitute(\$html, $whole, $newtag);
  2540
+				}
  2541
+				next;
  2542
+			}
  2543
+
  2544
+			# opening a new tag to be added to the stack
  2545
+			$tags{$tag}++;
  2546
+			push @stack, $tag;
  2547
+
  2548
+			# we keep track of lists in an add'l stack, for
  2549
+			# the immediately above purpose, so push it on here
  2550
+			push @list, $tag if $lists{$tag};
2355 2551
 		}
2356 2552
 
2357 2553
 	}
2358 2554
 
2359  
-	$html =~ s/\s+$//;
  2555
+	$html =~ s/\s+$//s;
2360 2556
 
2361 2557
 	# add on any unclosed tags still on stack
2362  
-	$html .= join '', map { "</$_>" } grep { !exists $lone{$_} } reverse @stack;
  2558
+	$html .= join '', map { "</$_>" } grep { !exists $really_empty{$_} } reverse @stack;
  2559
+
  2560
+	# cheap and easy hack to make sure everything in a blockquote is also
  2561
+	# inside another block element; extra divs don't hurt anything
  2562
+	$html =~ s|<blockquote>|<blockquote><div>|gi;
  2563
+	$html =~ s|</blockquote>|</div></blockquote>|gi;
  2564
+
  2565
+	_validateLists(\$html);
  2566
+	_removeEmpty(\$html);
2363 2567
 
2364 2568
 	return $html;
2365 2569
 }
2366 2570
 
  2571
+sub _removeEmpty {
  2572
+	my($html) = @_;
  2573
+	my $p = getCurrentStatic('xhtml') ? '<p />' : '<p>';
  2574
+	$$html =~ s|<p>\s*</p>|$p|g;
  2575
+	$$html =~ s|<(\w+)>\s*</\1>||g;
  2576
+}
  2577
+
  2578
+
  2579
+# validate the structure of lists ... essentially, make sure
  2580
+# they are properly nested, that everything in a list is inside
  2581
+# a proper li/dt/dd, etc.
  2582
+
  2583
+sub _validateLists {
  2584
+	my($html) = @_;
  2585
+
  2586
+	# each nested list is cleaned up and then stored in the hash,
  2587
+	# to be expanded later
  2588
+	my %full;
  2589
+	# counter for %full
  2590
+	my $j = 0;
  2591
+	
  2592
+	# the main loop finds paired list tags, and what is between them,
  2593
+	# like <ul> ... </ul>
  2594
+	while ($$html =~ m:(<($list_re)>(.*?)</\2>):sig) {
  2595
+		my($whole, $list, $content) = ($1, $2, $3);
  2596
+		# if we don't have an innermost list, but there's another
  2597
+		# list nested inside this one, increment pos and try again
  2598
+		if ($content =~ /<(?:$list_re)>/) {
  2599
+			pos($$html) -= length($whole) - length("<$list>");
  2600
+			next;
  2601
+		}
  2602
+
  2603
+		# the default element to use inside the list, for content
  2604
+		# that is not inside any proper element
  2605
+		my $inside = $lists{$list}[0];
  2606
+		my $re     = $lists_re{$list};
  2607
+
  2608
+		# since we are looking at innermost lists, we do not
  2609
+		# need to worry about stacks or nesting, just keep
  2610
+		# track of the current element that we are in
  2611
+		my $in    = '';
  2612
+
  2613
+		# the secondary loop finds either a tag, or text between tags
  2614
+		while ($content =~ m!\s*([^<]+|<(.+?)>)!sig) {
  2615
+			my($whole, $tag) = ($1, $2);
  2616
+			next if $whole !~ /\S/;
  2617
+			# we only care here if this is one that can be inside a list
  2618
+			if ($tag) {
  2619
+				# if open tag ...
  2620
+				if ($tag =~ /^(?:$re)$/) {
  2621
+					# add new close tag if we are current inside a tag
  2622
+					_substitute(\$content, $whole, "</$in>$whole") if $in;
  2623
+					# set new open tag
  2624
+					$in = $tag;
  2625
+					next;
  2626
+
  2627
+				# if close tag ...
  2628
+				} elsif ($tag =~ /^\/(?:$re)$/) {
  2629
+					# remove if we are not already inside a tag
  2630
+					_substitute(\$content, $whole, '') unless $in;
  2631
+					# this should never happen, as we've already
  2632
+					# balanced the tags
  2633
+					warn "huh?  $tag ne /$in?" if $tag ne "/$in";
  2634
+					# set to no open tag
  2635
+					$in = '';
  2636
+					next;
  2637
+				}
  2638
+			}
  2639
+
  2640
+			# we are NOT an appropriate tag, or inside one, so
  2641
+			# create one to be inside of
  2642
+			if (!$in) {
  2643
+				$in = $inside;
  2644
+				_substitute(\$content, $whole, "<$inside>$whole");
  2645
+			}
  2646
+		}
  2647
+
  2648
+		# now done with loop, so add rest of $in if there is any
  2649
+		$content .= "</$in>" if $in;
  2650
+
  2651
+		# we have nesting to deal with, so replace this part
  2652
+		# with a temporary token and cache the result in the hash
  2653
+		$full{$j} = "<$list>$content</$list>";
  2654
+		_substitute($html, $whole, "<FULL-$j>");
  2655
+		$j++;
  2656
+		pos($$html) = 0;  # start over
  2657
+	}
  2658
+
  2659
+	# expand it all back out
  2660
+	while ($j--) {
  2661
+		last if $j < 0;
  2662
+		$$html =~ s/<FULL-$j>/$full{$j}/;
  2663
+	}
  2664
+
  2665
+	return 1;
  2666
+}
  2667
+
  2668
+# put a string into the current position in that string, and update
  2669
+# pos() accordingly
  2670
+sub _substitute {
  2671
+	my($full, $old, $new, $zeropos) = @_;
  2672
+	# zeropos is for when we add a close tag or somesuch, but don't touch
  2673
+	# the stack, and just let the code handle it by keeping pos right in
  2674
+	# front of the new tag
  2675
+
  2676
+	my $len  = length $old;
  2677
+
  2678
+	my $p = pos($$full) - $len;
  2679
+	substr($$full, $p, ($zeropos ? 0 : $len)) = $new;
  2680
+	pos($$full) = $p + ($zeropos ? 0 : length($new));
  2681
+}
  2682
+}
  2683
+
2367 2684
 #========================================================================
2368 2685
 
2369 2686
 =head2 parseDomainTags(HTML, RECOMMENDED, NOTAGS)
@@ -2467,12 +2784,12 @@ The parsed HTML.
2467 2784
 sub parseSlashizedLinks {
2468 2785
 	my($html, $options) = @_;
2469 2786
 	$html =~ s{
2470  
-		<A[ ]HREF="__SLASHLINK__"
  2787
+		<a[ ]href="__SLASHLINK__"
2471 2788
 		([^>]+)
2472 2789
 		>
2473 2790
 	}{
2474 2791
 		_slashlink_to_link($1, $options)
2475  
-	}gxe;
  2792
+	}igxe;
2476 2793
 	return $html;
2477 2794
 }
2478 2795
 
@@ -2531,7 +2848,7 @@ sub _slashlink_to_link {
2531 2848
 			$url .= qq{#$frag} if $frag;
2532 2849
 		}
2533 2850
 	}
2534  
-	return q{<A HREF="} . strip_urlattr($url) . q{">};
  2851
+	return q{<a href="} . strip_urlattr($url) . q{">};
2535 2852
 }
2536 2853
 
2537 2854
 #========================================================================
@@ -2572,14 +2889,14 @@ sub addDomainTags {
2572 2889
 	my $in_a = 0;
2573 2890
 	$html =~ s
2574 2891
 	{
2575  
-		( < (/?) A \b[^>]* > )
  2892
+		( < (/?) a \b[^>]* > )
2576 2893
 	}{
2577 2894
 		my $old_in_a = $in_a;
2578 2895
 		my $new_in_a = !$2;
2579 2896
 		$in_a = $new_in_a;
2580  
-		(($old_in_a && $new_in_a) ? '</A>' : '') . $1
  2897
+		(($old_in_a && $new_in_a) ? '</a>' : '') . $1
2581 2898
 	}gixe;
2582  
-	$html .= '</A>' if $in_a;
  2899
+	$html .= '</a>' if $in_a;
2583 2900
 
2584 2901
 	# Now, since we know that every <A> has a </A>, this pattern will
2585 2902
 	# match and let the subroutine above do its magic properly.
@@ -2590,12 +2907,12 @@ sub addDomainTags {
2590 2907
 
2591 2908
 	$html =~ s
2592 2909
 	{
2593  
-		(<A\s+HREF="		# $1 is the whole <A HREF...>
  2910
+		(<a\s+href="		# $1 is the whole <A HREF...>
2594 2911
 			([^">]*)	# $2 is the URL (quotes guaranteed to
2595 2912
 					# be there thanks to approveTag)
2596 2913
 		">)
2597 2914
 		(.*?)			# $3 is whatever's between <A> and </A>
2598  
-		</A\b[^>]*>
  2915
+		</a\b[^>]*>
2599 2916
 	}{
2600 2917
 		$3	? _url_to_domain_tag($1, $2, $3)
2601 2918
 			: ''
@@ -2608,7 +2925,7 @@ sub addDomainTags {
2608 2925
 	# and doesn't overlap, so now we can just remove the extra ones,
2609 2926
 	# which are easy to tell because they DON'T have domain tags.
2610 2927
 
2611  
-	$html =~ s{</A>}{}g;
  2928
+	$html =~ s{</a>}{}gi;
2612 2929
 
2613 2930
 	return $html;
2614 2931
 }
@@ -2852,7 +3169,7 @@ sub _link_to_slashlink {
2852 3169
 	# If we have something good in %attr, we can go ahead and
2853 3170
 	# use our custom tag.  Concatenate it together.
2854 3171
 	if ($attr{sn}) {
2855  
-		$retval = q{<A HREF="__SLASHLINK__" }
  3172
+		$retval = q{<a href="__SLASHLINK__" }
2856 3173
 			. join(" ",
2857 3174
 				map { qq{$_="} . strip_attribute($attr{$_}) . qq{"} }
2858 3175
 				sort keys %attr)
2  Slash/XML/RSS/RSS.pm
@@ -477,7 +477,7 @@ sub rss_item_description {
477 477
 				$desc =~ s/[\w'-]+$//;  # don't trim in middle of word
478 478
 				if ($self->{rdfitemdesc_html}) {
479 479
 					$desc =~ s/<[^>]*$//;
480  
-					$desc = balanceTags($desc);
  480
+					$desc = balanceTags($desc, { deep_nesting => 1 });
481 481
 				}
482 482
 				$desc =~ s/\s+$//;
483 483
 				$desc .= '...';
4  plugins/Submit/submit.pl
@@ -563,7 +563,7 @@ sub displayForm {
563 563
 		$fixedstory =~ s/^<(?:P|BR)(?:>|\s[^>]*>)//i;
564 564
 		$fixedstory =~ s/<(?:P|BR)(?:>|\s[^>]*>)$//i;
565 565
 	}
566  
-	$fixedstory = balanceTags($fixedstory);
  566
+	$fixedstory = balanceTags($fixedstory, { deep_nesting => 1 });
567 567
 
568 568
 	slashDisplay('displayForm', {
569 569
 		fixedstory	=> $fixedstory,
@@ -616,7 +616,7 @@ sub saveSub {
616 616
 	} else {
617 617
 		$form->{story} = strip_html(url2html($form->{story}));
618 618
 	}
619  
-	$form->{story} = balanceTags($form->{story});
  619
+	$form->{story} = balanceTags($form->{story}, { deep_nesting => 1 });
620 620
 
621 621
 	my $uid ||= $form->{name}
622 622
 		? getCurrentUser('uid')
33  sbin/portald
@@ -39,6 +39,7 @@ setCurrentSkin(determineCurrentSkin());
39 39
 my $gSkin = getCurrentSkin();
40 40
 my $totalChangedStories = 1;
41 41
 
  42
+my $br = $constants->{xhtml} ? '<br />' : '<br>';
42 43
 my $backupdb = getObject('Slash::DB', { db_type => 'reader' });
43 44
 
44 45
 ################################################################################
@@ -69,30 +70,30 @@ sub geturl {
69 70
 ################################################################################
70 71
 
71 72
 sub getTop10Comments {
72  
-	my $A =	$backupdb->getTop10Comments();
  73
+	my $A =	$backupdb->getTop10Comments;
73 74
 
74  
-	my $reasons = $slashdb->getReasons();
  75
+	my $reasons = $slashdb->getReasons;
75 76
 
76 77
 	my $block;
77 78
 	foreach (@$A) {
78 79
 		my($sid, $title, $cid, $subj, $d, $nickname, $points, $reason) = @$_;
79 80
 		$block .= <<EOT;
80 81
 
81  
-&middot; <B><A HREF="$gSkin->{rootdir}/comments.pl?sid=$sid&amp;cid=$cid">$subj</A>
  82
+&middot; <b><a href="$gSkin->{rootdir}/comments.pl?sid=$sid&amp;cid=$cid">$subj</a>
82 83
 	($points points, $reasons->{$reason}{name})
83  
-	by $nickname</B>
  84
+	by $nickname</b>
84 85
 	on $d
85  
-	<FONT SIZE="1">attached to
86  
-	<A HREF="$gSkin->{rootdir}/article.pl?sid=$sid">$title</A></FONT><BR>
  86
+	<small>attached to
  87
+	<a href="$gSkin->{rootdir}/article.pl?sid=$sid">$title</a></small>$br
87 88
 EOT
88 89
 	}
89  
-	setblock("top10comments", $block);
  90
+	setblock('top10comments', $block);
90 91
 
91 92
 }
92 93
 
93 94
 #################################################################
94 95
 sub getSlashdotPoll {
95  
-	setblock("poll", pollbooth('_currentqid', 1));
  96
+	setblock('poll', pollbooth('_currentqid', 1));
96 97
 }
97 98
 
98 99
 
@@ -111,22 +112,22 @@ The fortune command.
111 112
 #################################################################
112 113
 sub getUptime {
113 114
 	my $x = `/usr/bin/uptime`;
114  
-	$x = "<B>time:</B> $x";
115  
-	$x =~ s/up/\n<BR><B>uptime:<\/B>/g;
116  
-	$x =~ s/load average:/\n<BR><B>load average:<\/B>/;
  115
+	$x = "<b>time:</b> $x";
  116
+	$x =~ s/up/\n$br<b>uptime:<\/b>/g;
  117
+	$x =~ s/load average:/\n<br><b>load average:<\/b>/;
117 118
 	my $ps = `/bin/ps aux | /usr/bin/wc -l`;
118 119
 	$ps--;
119  
-	$x .= "<BR><B>processes:</B> $ps <BR>";
  120
+	$x .= "$br<b>processes:</b> $ps$br";
120 121
 
121 122
 	my $stats = $x;
122 123
 
123 124
 #	my $tc = $constants->{totalComments};
124 125
 	my $th = $constants->{totalhits};
125 126
 
126  
-#	$stats .= "<B>yesterday:</B> $yesterday<BR>
127  
-#		<B>today:</B> $today<BR>
128  
-#		<B>ever:</B> $th<BR>";
129  
-	$stats .= "<B>totalhits:</B> $th<BR>";
  127
+#	$stats .= "<b>yesterday:</b> $yesterday<br>
  128
+#		<b>today:</b> $today<br>
  129
+#		<b>ever:</b> $th<br>";
  130
+	$stats .= "<b>totalhits:</b> $th$br";
130 131
 
131 132
 	setblock('uptime', $stats);
132 133
 }
12  sql/mysql/defaults.sql
@@ -623,14 +623,13 @@ INSERT INTO vars (name, value, description) VALUES ('adminmail_post','admin@exam
623 623
 INSERT INTO vars (name, value, description) VALUES ('allow_anonymous','1','allow anonymous posters');
624 624
 INSERT INTO vars (name, value, description) VALUES ('allow_moderation','1','allows use of the moderation system');
625 625
 INSERT INTO vars (name, value, description) VALUES ('allow_nonadmin_ssl','0','0=users with seclev <= 1 cannot access the site over Secure HTTP; 1=they all can; 2=only if they are subscribers');
626  
-INSERT INTO vars (name, value, description) VALUES ('anchortags_bridge_breaks', '0', 'Are <A> tags allowed to stretch across breaking tags (defined in approvedtags_break)?');
627 626
 INSERT INTO vars (name, value, description) VALUES ('anonymous_coward_uid', '1', 'UID to use for anonymous coward');
628 627
 INSERT INTO vars (name, value, description) VALUES ('anon_name_alt','An anonymous coward','Name of anonymous user to be displayed in stories');
629 628
 INSERT INTO vars (name, value, description) VALUES ('apache_cache', '3600', 'Default times for the getCurrentCache().');
630 629
 INSERT INTO vars (name, value, description) VALUES ('approved_url_schemes','ftp|http|gopher|mailto|news|nntp|telnet|wais|https','Schemes that can be used in comment links without being stripped of bogus chars');
631  
-INSERT INTO vars (name, value, description) VALUES ('approvedtags','B|I|P|A|LI|OL|UL|EM|BR|TT|STRONG|BLOCKQUOTE|DIV|ECODE','Tags that you can use');
632  
-INSERT INTO vars (name, value, description) VALUES ('approvedtags_attr', 'a:href_RU img:src_RU,alt,width,height,longdesc_U', 'definition of approvedtags attributes in the following format a:href_RU img:src_RU,alt,width,height,longdesc_U see Slash::Utility::Data.pm for more details');
633  
-INSERT INTO vars (name, value, description) VALUES ('approvedtags_break','P|LI|OL|UL|BR|BLOCKQUOTE|DIV','Tags that break words (see breakHtml())');
  630
+INSERT INTO vars (name, value, description) VALUES ('approvedtags','b|i|p|br|a|ol|ul|li|dl|dt|dd|em|strong|tt|blockquote|div|ecode','Tags that you can use');
  631
+INSERT INTO vars (name, value, description) VALUES ('approvedtags_attr', 'a:href_RU img:src_RU,alt_N,width,height,longdesc_U', 'definition of approvedtags attributes in the following format a:href_RU img:src_RU,alt,width,height,longdesc_U see Slash::Utility::Data.pm for more details');
  632
+INSERT INTO vars (name, value, description) VALUES ('approvedtags_break','p|br|ol|ul|li|dl|dt|dd|blockquote|div|img|hr|h1|h2|h3|h4|h5|h6','Tags that break words (see breakHtml())');
634 633
 INSERT INTO vars (name, value, description) VALUES ('archive_delay','60','days to wait for story archiving');
635 634
 INSERT INTO vars (name, value, description) VALUES ('archive_delay_mod','60','Days before moderator logs are expired');
636 635
 INSERT INTO vars (name, value, description) VALUES ('articles_only','0','show only Articles in submission count in admin menu');
@@ -742,7 +741,7 @@ INSERT INTO vars (name, value, description) VALUES ('discussions_speed_limit','3
742 741
 INSERT INTO vars (name, value, description) VALUES ('do_expiry','1','Flag which controls whether we expire users.');
743 742
 INSERT INTO vars (name, value, description) VALUES ('down_moderations','-6','number of how many comments you can post that get down moderated');
744 743
 INSERT INTO vars (name, value, description) VALUES ('draconian_charrefs','0','Enable strictest-possible rules for disallowing HTML entities/character references?');
745  
-INSERT INTO vars (name, value, description) VALUES ('draconian_charset','0','Convert high-bit characters to character references, which are then filtered by approveCharrefs or encode_html_amp (works only with Latin-1 for now)');
  744
+INSERT INTO vars (name, value, description) VALUES ('draconian_charset','1','Convert high-bit characters to character references, which are then filtered by approveCharrefs or encode_html_amp (works only with Latin-1 for now)');
746 745
 INSERT INTO vars (name, value, description) VALUES ('draconian_charset_convert','0','Convert some of high-bit chars to ASCII representations instead (see draconian_charset)');
747 746
 INSERT INTO vars (name, value, description) VALUES ('email_domains_invalid', 'example.com', 'space separated list of domains that are not valid for email addresses');
748 747
 INSERT INTO vars (name, value, description) VALUES ('enable_index_topic','','set this to the value in string param for index topic \(something like "topic_4"\)');
@@ -801,7 +800,6 @@ INSERT INTO vars (name, value, description) VALUES ('log_db_user','','The virtua
801 800
 INSERT INTO vars (name, value, description) VALUES ('logdir','/usr/local/slash/www.example.com/logs','Where should the logs be found?');
802 801
 INSERT INTO vars (name, value, description) VALUES ('login_speed_limit', '20', 'How fast a user can create users, etc.'); 
803 802
 INSERT INTO vars (name, value, description) VALUES ('login_temp_minutes', '10', 'Minutes before a temporary login expires');
804  
-INSERT INTO vars (name, value, description) VALUES ('lonetags','P|LI|BR|IMG','Tags that don\'t need to be closed');
805 803
 INSERT INTO vars (name, value, description) VALUES ('m1_eligible_hitcount','3','Number of hits on comments.pl before user can be considered eligible for moderation');
806 804
 INSERT INTO vars (name, value, description) VALUES ('m1_eligible_percentage','0.8','Percentage of users eligible to moderate');
807 805
 INSERT INTO vars (name, value, description) VALUES ('m1_pointgrant_end', '0.8888', 'Ending percentage into the pool of eligible moderators (used by moderatord)');
@@ -897,6 +895,7 @@ INSERT INTO vars (name, value, description) VALUES ('moderatord_debug_info', '1'
897 895
 INSERT INTO vars (name, value, description) VALUES ('moderatord_lag_threshold','100000','The number of updates replication must be within before moderatord will run using the replicated handle. If this threshold isn\'t met, moderatord will not run.');
898 896
 INSERT INTO vars (name, value, description) VALUES ('modviewseclev','100','Minimum seclev to see moderation totals on a comment');
899 897
 INSERT INTO vars (name, value, description) VALUES ('nesting_maxdepth','3','Maximum depth to which <BLOCKQUOTE>-type tags can be nested');
  898
+INSERT INTO vars (name, value, description) VALUES ('nest_su_maxdepth','3','Maximum depth to which <SUP> and <SUB> tags can be nested');
900 899
 INSERT INTO vars (name, value, description) VALUES ('newsletter_body','0','Print bodytext, not merely introtext, in newsletter.');
901 900
 INSERT INTO vars (name, value, description) VALUES ('nick_chars', ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789$_.+!*\'(),-', 'Characters allowed in user nicknames');
902 901
 INSERT INTO vars (name, value, description) VALUES ('nick_maxlen', '20', 'Max length of nickname, should correspond with schema for users.nickname');
@@ -996,3 +995,4 @@ INSERT INTO vars (name, value, description) VALUES ('users_count','1','(Approxim
996 995
 INSERT INTO vars (name, value, description) VALUES ('users_show_info_seclev','0','Minimum seclev to view a user\s info');
997 996
 INSERT INTO vars (name, value, description) VALUES ('users_speed_limit','20','How fast a user can change their prefs');
998 997
 INSERT INTO vars (name, value, description) VALUES ('writestatus','dirty','Simple Boolean to determine if homepage needs rewriting');
  998
+INSERT INTO vars (name, value, description) VALUES ('xhtml','0','Boolean for whether we are using XHTML');
36  sql/mysql/upgrades
@@ -2874,14 +2874,10 @@ INSERT INTO vars (name, value, description) VALUES ('daypass_offer_onlywhentmf',
2874 2874
 
2875 2875
 # End of T_2_5_0_53, Start of T_2_5_0_54 - 2005/03/29
2876 2876
 
2877  
-# SLASHCODE/USEPERL LAST UPDATED HERE
2878  
-
2879 2877
 INSERT INTO vars (name, value, description) VALUES ('daypass_tz', 'PST', 'What timezone are daypasses considered to be in (this determines where "midnight" starts and ends the day)');
2880 2878
 
2881 2879
 # End of T_2_5_0_54, Start of T_2_5_0_55 - 2005/03/31
2882 2880
 
2883  
-# SLASHDOT LAST UPDATED HERE
2884  
-
2885 2881
 # Changes for plugins/Dilemma (ignore if not using that plugin)
2886 2882
 INSERT INTO vars (name, value, description) VALUES ('dilemma_draw_graph_ticks', '100', 'Draw graph every this many ticks (roughly -- before this tickcount, draw every time; after 20x this tickcount, draw 1/3 as often');
2887 2883
 ALTER TABLE dilemma_agents ADD COLUMN trid INT UNSIGNED NOT NULL AFTER daid, DROP INDEX alive, ADD INDEX trid_alive (trid, alive);
@@ -2900,6 +2896,10 @@ ALTER TABLE dilemma_stats DROP COLUMN name;
2900 2896
 ALTER TABLE dilemma_meetlog ADD COLUMN trid INT UNSIGNED NOT NULL AFTER meetid, DROP INDEX tick, ADD INDEX trid_tick (trid, tick);
2901 2897
 UPDATE dilemma_meetlog SET trid=1;
2902 2898
 
  2899
+# SLASHCODE/USEPERL LAST UPDATED HERE
  2900
+
  2901
+# SLASHDOT LAST UPDATED HERE
  2902
+
2903 2903
 # End of T_2_5_0_55, Start of T_2_5_0_56 - 2005/04/05
2904 2904
 
2905 2905
 # Changes for plugins/Daypass (ignore if not using that plugin)
@@ -2912,3 +2912,31 @@ INSERT INTO vars (name, value, description) VALUES ('daypass_offer_method1_regex
2912 2912
 
2913 2913
 # End of T_2_5_0_56, Start of T_2_5_0_57 - 2005/04/08
2914 2914
 
  2915
+#############
  2916
+# be careful with below, make sure right for your site
  2917
+
  2918
+# this is not required, but highly advised; it shouldn't have an adverse
  2919
+# effect on most sites, but on sites that use alternative charsets ... dunno
  2920
+UPDATE vars SET value = '1' WHERE name = 'draconian_charset';
  2921
+
  2922
+# look at this carefully, make sure it has all the tags you want, and none you don't
  2923
+# note that you cannot introduce new tags this way anymore, those must be added in
  2924
+# the code directly, then *enabled* here
  2925
+REPLACE INTO vars (name, value, description) VALUES ('approvedtags','b|i|p|br|a|ol|ul|li|dl|dt|dd|em|strong|tt|blockquote|div|ecode','Tags that you can use');
  2926
+
  2927
+# we add N option and use it for alt
  2928
+REPLACE INTO vars (name, value, description) VALUES ('approvedtags_attr', 'a:href_RU img:src_RU,alt_N,width,height,longdesc_U', 'definition of approvedtags attributes in the following format a:href_RU img:src_RU,alt,width,height,longdesc_U see Slash::Utility::Data.pm for more details');
  2929
+
  2930
+# this should not need any modification
  2931
+INSERT INTO vars (name, value, description) VALUES ('approvedtags_break','p|br|ol|ul|li|dl|dt|dd|blockquote|div|img|hr|h1|h2|h3|h4|h5|h6','Tags that break words (see breakHtml())');
  2932
+
  2933
+# this still won't be in effect unless sub or sup is in approvedtags, of course
  2934
+INSERT INTO vars (name, value, description) VALUES ('nest_su_maxdepth','3','Maximum depth to which <SUP> and <SUB> tags can be nested');
  2935
+
  2936
+# HTML is recommended, but if you are using XHTML, by all means, set this to 1
  2937
+INSERT INTO vars (name, value, description) VALUES ('xhtml','0','Boolean for whether we are using XHTML');
  2938
+
  2939
+#
  2940
+#############
  2941
+
  2942
+
7  themes/slashcode/htdocs/comments.pl
@@ -634,10 +634,9 @@ sub validateComment {
634 634
 		}
635 635
 	}
636 636
 
637  
-	unless (defined($$comm = balanceTags($$comm, $constants->{nesting_maxdepth}))) {
638  
-		# If we didn't return from right here, one or more later
639  
-		# error messages would overwrite this one.
640  
-		$$error_message = getError('nesting too deep');
  637
+	unless (defined($$comm = balanceTags($$comm, { deep_nesting => 1 }))) {
  638
+		# only time this should return an error is if the HTML is busted
  639
+		$$error_message = getError('broken html');
641 640
 		return ;
642 641
 	}
643 642
 
2  themes/slashcode/htdocs/users.pl
@@ -2279,7 +2279,7 @@ sub saveUser {
2279 2279
 	for my $key (keys %extr) {
2280 2280
 		my $dat = $extr{$key};
2281 2281
 		$dat = strip_html($dat);
2282  
-		$dat = balanceTags($dat, 1); # only 1 nesting tag (UL, OL, BLOCKQUOTE) allowed
  2282
+		$dat = balanceTags($dat, { deep_nesting => 2 }); # only 2 nesting tags (UL, OL, BLOCKQUOTE) allowed
2283 2283
 		$dat = addDomainTags($dat) if $dat;
2284 2284
 
2285 2285
 		# If the sig becomes too long to fit (domain tagging causes
2  themes/slashcode/templates/default;portald;default
@@ -11,7 +11,7 @@ en_US
11 11
 __name__
12 12
 default
13 13
 __template__
14  
-&middot; <A HREF="[% item.link | strip_attribute %]">[% item.title | strip_notags %]</A><BR>
  14
+&middot; <a href="[% item.link | strip_attribute %]">[% item.title | strip_notags %]</a><br[% ' /' IF constants.xhtml %]>
15 15
 __seclev__
16 16
 10000
17 17
 __version__
6  themes/slashcode/templates/errors;comments;default
@@ -167,10 +167,8 @@ and (optionally, but preferably) your IP number
167 167
 "<tt>[% unencoded_ip %]</tt>" and your username "<tt>[% user.nickname | strip_literal %]</tt>"[%
168 168
 END %].
169 169
 
170  
-[% # NESTING TOO DEEP
171  
-CASE "nesting too deep" %]
172  
-You can only post nested lists and blockquotes [% constants.nesting_maxdepth %]
173  
-levels deep. Please fix your UL, OL, DL, and BLOCKQUOTE tags.
  170
+[% CASE "broken html" %]
  171
+Your comment could not be processed.  Please try again.
174 172
 
175 173
 [% # LOW CHARS-PER-LINE
176 174
 CASE "low chars-per-line" %]
2  utils/createTestComments
@@ -139,7 +139,7 @@ $werder = new Silly::Werder;
139 139
 		elsif ($mode_rand < 0.50)	{ $comment = strip_html     ($comment) }
140 140
 		elsif ($mode_rand < 0.75)	{ $comment = strip_extrans  ($comment) }
141 141
 		else				{ $comment = strip_code     ($comment) }
142  
-		$comment = balanceTags($comment, $constants->{nesting_maxdepth});
  142
+		$comment = balanceTags($comment, { deep_nesting => 1 });
143 143
 		$comment = addDomainTags($comment);
144 144
 
145 145
 		my $score = 1;

0 notes on commit 83ddfab

Please sign in to comment.
Something went wrong with that request. Please try again.