Browse files

Two major changes together (sorry) -- getSimilarStory and slashizedLi…

…nks.

"Similar story" code adds a task to plugins/Admin which generates a
list of "uncommon words" and URLs (currently stored in a var, which I
don't much like but it's there for now) from recent stories.  Each story
preview/edit uses that list and the list of words in the edited story to
find a list of similar stories, which can help admins find useful links
and duplicates.  And "slashizeLinks," part of Slash::Utility::Data and
referenced by admin.pl, Slash.pm and article.pl, converts all article
and comments links internal to a Slash site (including .shtml links),
in stories, to a standard format which is parsed at display time.  This
ensures that such links are in a standard format, and that static pages
link to static pages and dynamic pages to dynamic.  Also, a few minor
little bugs and spelling mistakes are fixed.
  • Loading branch information...
1 parent 3672774 commit 7736a750ad3632ca7807b8cdba5cfe96d95e7d23 @jamiemccarthy jamiemccarthy committed May 26, 2002
View
122 Slash/DB/MySQL/MySQL.pm
@@ -4772,6 +4772,128 @@ sub getStory {
}
########################################################
+sub getSimilarStories {
+ my($self, $story, $max_wanted) = @_;
+ $max_wanted ||= 100;
+ my $constants = getCurrentStatic();
+ my($title, $introtext, $bodytext) =
+ ($story->{title} || "",
+ $story->{introtext} || "",
+ $story->{bodytext} || "");
+ my $not_original_sid = $story->{sid} || "";
+ $not_original_sid = " AND stories.sid != "
+ . $self->sqlQuote($not_original_sid)
+ if $not_original_sid;
+
+ my $text = "$title $introtext $bodytext";
+ # Find a list of all the words in the current story.
+ my $text_words = findWords($text);
+ # Load up the list of words in recent stories (the only ones we
+ # need to concern ourselves with looking for).
+ my @recent_uncommon_words = split " ",
+ ($self->getVar("uncommonstorywords", "value") || "");
+ # If we don't (yet) know the list of uncommon words, return now.
+ return [ ] unless @recent_uncommon_words;
+ # Find the intersection of this story and recent stories.
+ my @text_uncommon_words =
+ sort {
+ $text_words->{$b}{weight} <=> $text_words->{$a}{weight}
+ ||
+ $a cmp $b
+ }
+ grep { $text_words->{$_}{count} }
+ grep { length($_) > 3 }
+ @recent_uncommon_words;
+#use Data::Dumper;
+#print STDERR "text_words: " . Dumper($text_words);
+#print STDERR "uncommon intersection: '@text_uncommon_words'\n";
+ # If there is no intersection, return now.
+ return [ ] unless @text_uncommon_words;
+ # Find previous stories which have used these words.
+ my $where = "";
+ my @where_clauses = ( );
+ for my $word (@text_uncommon_words) {
+ my $word_q = $self->sqlQuote('%' . $word . '%');
+ push @where_clauses, "stories.title LIKE $word_q";
+ push @where_clauses, "story_text.introtext LIKE $word_q";
+ push @where_clauses, "story_text.bodytext LIKE $word_q";
+ }
+ $where = join(" OR ", @where_clauses);
+ my $n_days = $constants->{similarstorydays} || 30;
+ my $stories = $self->sqlSelectAllHashref(
+ "sid",
+ "stories.sid AS sid, title, introtext, bodytext,
+ time, displaystatus",
+ "stories, story_text",
+ "stories.sid = story_text.sid $not_original_sid
+ AND stories.time >= DATE_SUB(NOW(), INTERVAL $n_days DAY)
+ AND ($where)"
+ );
+#print STDERR "similar stories: " . Dumper($stories);
+
+ for my $sid (keys %$stories) {
+ # Add up the weights of each story in turn, for how closely
+ # they match with the current story. Include a multiplier
+ # based on the length of the match.
+ my $s = $stories->{$sid};
+ $s->{weight} = 0;
+ for my $word (@text_uncommon_words) {
+ my $word_weight = 0;
+ my $wr = qr{(?i:\b\Q$word\E)};
+ my $m = log(length $word);
+ $word_weight += 2.0*$m * (() = $s->{title} =~ m{$wr}g);
+ $word_weight += 1.0*$m * (() = $s->{introtext} =~ m{$wr}g);
+ $word_weight += 0.5*$m * (() = $s->{bodytext} =~ m{$wr}g);
+ $s->{word_hr}{$word} = $word_weight if $word_weight > 0;
+ $s->{weight} += $word_weight;
+ }
+ # Round off weight to 0 decimal places (to an integer).
+ $s->{weight} = sprintf("%.0f", $s->{weight});
+ # Store (the top-scoring-ten of) the words that connected
+ # the original story to this story.
+ $s->{words} = [
+ sort { $s->{word_hr}{$b} <=> $s->{word_hr}{$a} }
+ keys %{$s->{word_hr}}
+ ];
+ $#{$s->{words}} = 9 if $#{$s->{words}} > 9;
+ }
+ # If any stories match and are above the threshold, return them.
+ # Pull out the top $max_wanted scorers. Then sort them by time.
+ my $minweight = $constants->{similarstoryminweight} || 4;
+ my @sids = sort {
+ $stories->{$b}{weight} <=> $stories->{$a}{weight}
+ ||
+ $stories->{$b}{'time'} cmp $stories->{$a}{'time'}
+ ||
+ $a cmp $b
+ } grep { $stories->{$_}{weight} >= $minweight } keys %$stories;
+#print STDERR "all sids @sids stories " . Dumper($stories);
+ return [ ] if !@sids;
+ $#sids = $max_wanted-1 if $#sids > $max_wanted-1;
+ # Now that we have only the ones we want, push them onto the
+ # return list sorted by time.
+ my $ret_ar = [ ];
+ for my $sid (sort {
+ $stories->{$b}{'time'} cmp $stories->{$a}{'time'}
+ ||
+ $stories->{$b}{weight} <=> $stories->{$a}{weight}
+ ||
+ $a cmp $b
+ } @sids) {
+ push @$ret_ar, {
+ sid => $sid,
+ weight => sprintf("%.0f", $stories->{$sid}{weight}),
+ title => $stories->{$sid}{title},
+ 'time' => $stories->{$sid}{'time'},
+ words => $stories->{$sid}{words},
+ displaystatus => $stories->{$sid}{displaystatus},
+ };
+ }
+#print STDERR "ret_ar " . Dumper($ret_ar);
+ return $ret_ar;
+}
+
+########################################################
sub getAuthor {
my($self) = @_;
_genericCacheRefresh($self, 'authors_cache', getCurrentStatic('block_expire'));
View
52 Slash/DB/Static/MySQL/MySQL.pm
@@ -1029,6 +1029,58 @@ sub createAuthorCache {
}
########################################################
+# For plugins/Admin/refresh_uncommon.pl
+sub refreshUncommonStoryWords {
+ my($self) = @_;
+ my $constants = getCurrentStatic();
+ my $ignore_threshold = $constants->{uncommonstoryword_thresh} || 2;
+ my $n_days = $constants->{similarstorydays} || 30;
+ $ignore_threshold = int($n_days/$ignore_threshold+0.5);
+
+ # First, get a collection of all words posted in stories for the last
+ # however-many days.
+ my $arr = $self->sqlSelectAll(
+ "title, introtext, bodytext",
+ "story_text, stories",
+ "stories.sid = story_text.sid
+ AND stories.time >= DATE_SUB(NOW(), INTERVAL $n_days DAY)"
+ );
+ my $word_hr = { };
+ for my $ar (@$arr) {
+ findWords($ar->[0], 8 , $word_hr) if $ar->[0]; # title
+ findWords($ar->[1], 1 , $word_hr) if $ar->[1]; # introtext
+ findWords($ar->[2], 0.5, $word_hr) if $ar->[2]; # bodytext
+ }
+#use Data::Dumper; print STDERR Dumper($word_hr);
+
+ # The only words that count as uncommon are the ones that appear in
+ # stories less frequently than once every uncommonstoryword_thresh
+ # days. Everything else is, well, too common to bother with.
+ my @uncommon_words = ( );
+ my $maxlen = $constants->{uncommonstorywords_maxlen} || 65000;
+ my $minlen = $constants->{uncommonstoryword_minlen} || 3;
+ my $length = $maxlen+1;
+ @uncommon_words =
+ sort {
+ $word_hr->{$b}{weight} <=> $word_hr->{$a}{weight}
+ ||
+ length($b) <=> length($a)
+ ||
+ $a cmp $b
+ }
+ grep { $word_hr->{$_}{count} <= $ignore_threshold }
+ grep { length($_) > $minlen }
+ keys %$word_hr;
+#print STDERR "@uncommon_words\n";
+ my $uncommon_words = substr(join(" ", @uncommon_words), 0, $maxlen);
+ if (length($uncommon_words) == $maxlen) {
+ $uncommon_words =~ s/\s+\S+\Z//;
+ }
+
+ $self->setVar("uncommonstorywords", $uncommon_words);
+}
+
+########################################################
# For tasks/flush_formkeys.pl
sub deleteOldFormkeys {
my($self, $timeframe) = @_;
View
7 Slash/Slash.pm
@@ -198,6 +198,8 @@ sub selectComments {
# If we are refreshing, PRINT the totals so freshenup/archive
# tasks can update without having to redo all of the work.
+ push @{$comments->{0}{totals}}, 0
+ while scalar(@{$comments->{0}{totals}}) < $max-$min;
my $hp = join ',', @{$comments->{0}{totals}};
print STDERR "count $count, hitparade $hp\n";
}
@@ -912,8 +914,13 @@ sub displayStory {
# well, also, dispStory needs a story reference, not an SID,
# though that could be changed -- pudge
+ # And now we're also calling parseSlashizedLinks. - 2002/05/24 Jamie
+
$story->{storytime} = timeCalc($story->{'time'});
+ $story->{introtext} = parseSlashizedLinks($story->{introtext});
+ $story->{bodytext} = parseSlashizedLinks($story->{bodytext});
+
# get extra data from section table for this story
# (if exists)
# this only needs to run for slashdot
View
316 Slash/Utility/Data/Data.pm
@@ -44,13 +44,16 @@ use vars qw($VERSION @EXPORT);
($VERSION) = ' $Revision$ ' =~ /\$Revision:\s+([^\s]+)/;
@EXPORT = qw(
addDomainTags
+ slashizeLinks
approveCharref
parseDomainTags
+ parseSlashizedLinks
balanceTags
changePassword
chopEntity
countWords
encryptPassword
+ findWords
fixHref
fixint
fixparam
@@ -1531,7 +1534,7 @@ if necessary.
=item HTML
-The HTML with tagged with domains.
+The HTML tagged with domains.
=item RECOMMENDED
@@ -1558,7 +1561,7 @@ sub parseDomainTags {
my $user = getCurrentUser();
- # default is 2 # XXX Jamie I think should be 1
+ # default is 2
my $udt = exists($user->{domaintags}) ? $user->{domaintags} : 2;
$udt =~ /^(\d+)$/; # make sure it's numeric, sigh
@@ -1584,6 +1587,93 @@ sub parseDomainTags {
#========================================================================
+=head2 parseSlashizedLinks(HTML)
+
+To be called before sending the HTML to the user for display. Takes
+HTML with slashized links (see slashizedLinks()) and converts them to
+the appropriate HTML.
+
+=over 4
+
+=item Parameters
+
+=over 4
+
+=item HTML
+
+The HTML with slashized links.
+
+=back
+
+=item Return value
+
+The parsed HTML.
+
+=back
+
+=cut
+
+sub parseSlashizedLinks {
+ my($html) = @_;
+ $html =~ s{
+ <A[ ]HREF="__SLASHLINK__"
+ ([^>]+)
+ >
+ }{
+ _slashlink_to_link($1)
+ }gxe;
+ return $html;
+}
+
+# This function mirrors the behavior of _link_to_slashlink.
+
+sub _slashlink_to_link {
+ my($sl) = @_;
+ my $ssi = getCurrentForm('ssi') || 0;
+ my $slashdb = getCurrentDB();
+ my $constants = getCurrentStatic();
+ my $root = $constants->{rootdir};
+ my %attr = $sl =~ / (\w+)="([^"]+)"/g;
+ # We should probably de-strip-attribute the values of %attr
+ # here, but it really doesn't matter.
+
+ # Load up special values and delete them from the attribute list.
+ my $sn = delete $attr{sn} || "";
+ my $sect = delete $attr{sect} || "";
+ my $sect_root = ($sect ? $slashdb->getSection($sect, "url") : "")
+ || $root;
+ my $frag = delete $attr{frag} || "";
+ # Generate the return value.
+ my $retval = q{<A HREF="};
+ if ($sn eq 'comments') {
+ $retval .= qq{$sect_root/comments.pl?};
+ $retval .= join("&",
+ map { qq{$_=$attr{$_}} }
+ sort keys %attr);
+ $retval .= qq{#$frag} if $frag;
+ } elsif ($sn eq 'article') {
+ # Different behavior here, depending on whether we are
+ # outputting for a dynamic page, or a static one.
+ # This is the main reason for doing slashlinks at all!
+ if ($ssi) {
+ $retval .= qq{$sect_root/};
+ $retval .= qq{$sect/$attr{sid}.shtml};
+ $retval .= qq{?tid=$attr{tid}} if $attr{tid};
+ $retval .= qq{#$frag} if $frag;
+ } else {
+ $retval .= qq{$sect_root/article.pl?};
+ $retval .= join("&",
+ map { qq{$_=$attr{$_}} }
+ sort keys %attr);
+ $retval .= qq{#$frag} if $frag;
+ }
+ }
+ $retval .= q{">};
+ return $retval;
+}
+
+#========================================================================
+
=head2 addDomainTags(HTML)
To be called only after C<balanceTags>, or results are not guaranteed.
@@ -1712,6 +1802,146 @@ sub _url_to_domain_tag {
#========================================================================
+=head2 slashizeLinks(HTML)
+
+Munges HTML E<lt>aE<gt> tags that point to specific types of links on
+this Slash site (articles.pl, comments.pl, and articles .shtml pages)
+into a special type of E<lt>aE<gt> tag. Note that this is not proper
+HTML, and that it will be converted back to proper HTML when the
+story is displayed.
+
+=over 4
+
+=item Parameters
+
+=over 4
+
+=item HTML
+
+The HTML to slashize links in.
+
+=back
+
+=item Return value
+
+The converted HTML.
+
+=back
+
+=cut
+
+sub slashizeLinks {
+ my($html) = @_;
+ $html =~ s{
+ (<a[^>]+href\s*=\s*"?)
+ ([^"<>]+)
+ ([^>]*>)
+ }{
+ _link_to_slashlink($1, $2, $3)
+ }gxie;
+ return $html;
+}
+
+# URLs that match a pattern are converted into our special format.
+# Those that don't are passed through. This function mirrors the
+# behavior of _slashlink_to_link.
+
+sub _link_to_slashlink {
+ my($pre, $url, $post) = @_;
+ my $slashdb = getCurrentDB();
+ my $constants = getCurrentStatic();
+ my $retval = "$pre$url$post";
+#print STDERR "_link_to_slashlink begin '$url'\n";
+
+ # URLs may show up in any section, which means when absolutized
+ # their host may be either the main one or a sectional one. We
+ # have to allow for any of those possibilities.
+ my $abs = $constants->{absolutedir};
+ my $sections = $slashdb->getSections();
+ my @sect_urls = grep { $_ }
+ map { $sections->{$_}{url} }
+ sort keys %$sections;
+ my $any_host = "(?:"
+ . join("|", $abs, @sect_urls)
+ . ")";
+
+ my $canon_url = URI->new_abs($url, $abs)->canonical;
+ my $frag = $canon_url->fragment() || "";
+
+ # All possible URLs' arguments, soon to be attributes in the
+ # new tag (thus "urla"). Values are the name of the script ("sn")
+ # and expressions that can pull those arguments out of a text URL.
+ # (We could use URI::query_form to pull out the .pl arguments, but
+ # that wouldn't help with the .shtml regex so we might as well do
+ # it this way.) If we ever want to extend slash-linking to cover
+ # other tags, here's the place to start.
+ my %urla = (
+ qr{^$any_host/article\.pl\?} =>
+ { _sn => 'article',
+ sid => qr{\bsid=([\w/]+)} },
+ qr{^$any_host/\w+/\d+/\d+/\d+/\d+\.shtml\b} =>
+ { _sn => 'article',
+ sid => qr{^$any_host/\w+/(\d+/\d+/\d+/\d+)\.shtml\b} },
+ qr{^$any_host/comments\.pl\?} =>
+ { _sn => 'comments',
+ sid => qr{\bsid=(\d+)},
+ cid => qr{\bcid=(\d+)} },
+ );
+
+ # %attr is the data structure storing the attributes of the <a>
+ # tag that we will use.
+ my %attr = ( );
+ URLA: for my $regex (sort keys %urla) {
+ # This loop only applies to the regex that matches this
+ # URL (if any).
+ next unless $canon_url =~ $regex;
+
+ # The non-underscore keys are regexes that we need to
+ # pull from the URL.
+ for my $arg (sort grep !/^_/, keys %{$urla{$regex}}) {
+ ($attr{$arg}) = $canon_url =~ $urla{$regex}{$arg};
+ delete $attr{$arg} if !$attr{$arg};
+ }
+ # The _sn key is special, it gets copied into sn.
+ $attr{sn} = $urla{$regex}{_sn};
+ # Section and topic attributes get thrown in too.
+ if ($attr{sn} eq 'comments') {
+ # sid is actually a discussion id!
+ $attr{sect} = $slashdb->getDiscussion(
+ $attr{sid}, 'section');
+ $attr{tid} = $slashdb->getDiscussion(
+ $attr{sid}, 'topic');
+ } else {
+ # sid is a story id
+ $attr{sect} = $slashdb->getStory(
+ $attr{sid}, 'section', 1);
+ $attr{tid} = $slashdb->getStory(
+ $attr{sid}, 'tid', 1);
+ }
+ $attr{frag} = $frag if $frag;
+ # We're done once we match any regex to the link's URL.
+ last URLA;
+ }
+
+ # If we have something good in %attr, we can go ahead and
+ # use our custom tag. Concatenate it together.
+ if ($attr{sn}) {
+ $retval = q{<A HREF="__SLASHLINK__" }
+ . join(" ",
+ map { qq{$_="} . strip_attribute($attr{$_}) . qq{"} }
+ sort keys %attr)
+ . q{>};
+ }
+
+#print STDERR "_link_to_slashlink end '$url'\n";
+ # Return either the new $retval we just made, or just send the
+ # original text back.
+ return $retval;
+}
+
+
+#========================================================================
+
=head2 xmlencode_plain(TEXT)
Same as xmlencode(TEXT), but does not encode for use in HTML. This is
@@ -1994,6 +2224,88 @@ sub countWords {
return scalar @words / 2;
}
+########################################################
+# A very careful extraction of all the words from HTML text.
+# URLs count as words. (A different algorithm than countWords
+# because countWords just has to be fast; this has to be
+# precise. Also, this counts occurrences of each word -- which
+# is different than counting the overall number of words.)
+sub findWords {
+ my($text, $weight_factor, $wordcount) = @_;
+ my $constants = getCurrentStatic();
+
+ # The default amount to add is 1.
+ $weight_factor ||= 1;
+
+ # Return a hashref; keys are the words, values are the
+ # number of times they appear. If a hashref was passed
+ # in, add to it, otherwise make our own.
+ $wordcount ||= { };
+
+ # Pull out linked URLs from $text and treat them specially.
+ # We only recognize the two most common types of link.
+ # Actually, we could use HTML::LinkExtor here, which might
+ # be more robust...
+ my @urls_ahref = $text =~ m{
+ <a[^>]+href\s*=\s*"?
+ ([^"<>]+)
+ }gxi;
+ my @urls_imgsrc = $text =~ m{
+ <img[^>]+src\s*=\s*"?
+ ([^"<>]+)
+ }gxi;
+ foreach my $url (@urls_ahref, @urls_imgsrc) {
+ $url = URI->new_abs($url, $constants->{absolutedir})
+ ->canonical
+ ->as_string;
+ # Tiny URLs don't count.
+ next unless length($url) > 8;
+ # All URLs get a high weight so they are almost
+ # guaranteed to get into the list.
+ $wordcount->{$url}{weight} += $weight_factor * 10;
+ $wordcount->{$url}{count}++;
+ }
+
+ # Now remove the text's HTML tags and find and count the
+ # words remaining in the text. For our purposes, words
+ # can include character references (entities) and the '
+ # and - characters as well as \w. This regex is a bit
+ # messy. I've tried to reduce backtracking as much as
+ # possible but it's still a concern.
+ $text = strip_notags($text);
+ my $entity = qr{(?:&(?:(?:#x[0-9a-f]+|\d+)|[a-z0-9]+);)};
+ my @words = $text =~ m{
+ (
+ # Start with a non-apostrophe, non-dash char.
+ (?: $entity | \w )
+ # Followed by, optionally, any valid char.
+ [\w'-]?
+ # Followed by zero or more sequence of entities,
+ # character references, or normal chars. The
+ # ' and - must alternate with the other types,
+ # so '' and -- break words.
+ (?:
+ (?: $entity | \w ) ['-]?
+ )*
+ # And end with a non-apostrophe, non-dash char.
+ (?: $entity | \w )
+ )
+ }gxi;
+ for my $word (@words) {
+ # Ignore all words less than 4 chars.
+ next unless length($word) > 3;
+ $wordcount->{lc $word}{weight} += $weight_factor;
+ }
+ my %uniquewords = map { ( lc($_), 1 ) } @words;
+ for my $word (keys %uniquewords) {
+ $wordcount->{$word}{count}++;
+ }
+
+ # Return the hashref (the same one passed in, if one was
+ # passed in).
+ return $wordcount;
+}
+
#========================================================================
=head2 inList(list, value)
View
1 plugins/Admin/PLUGIN
@@ -3,6 +3,7 @@ name=Admin
mysql_dump=dump
pg_dump=dump
htdoc=admin.pl
+task=refresh_uncommon.pl
template=templates/adminLoginForm;admin;default
template=templates/blockEdit;admin;default
template=templates/colorEdit;admin;default
View
52 plugins/Admin/admin.pl
@@ -1037,14 +1037,14 @@ sub editStory {
$storyref->{dept} =~ s/^-//;
$storyref->{dept} =~ s/-$//;
- $storyref->{introtext} = $slashdb->autoUrl(
- $form->{section},
- $storyref->{introtext}
- );
- $storyref->{bodytext} = $slashdb->autoUrl(
- $form->{section},
- $storyref->{bodytext}
- );
+ for my $field (qw( introtext bodytext )) {
+ $storyref->{$field} = $slashdb->autoUrl(
+ $form->{section}, $storyref->{$field});
+ $storyref->{$field} = slashizeLinks(
+ $storyref->{$field});
+ $storyref->{$field} = parseSlashizedLinks(
+ $storyref->{$field});
+ }
$topic = $slashdb->getTopic($storyref->{tid});
$form->{uid} ||= $user->{uid};
@@ -1147,6 +1147,27 @@ sub editStory {
$future = [ reverse(@$future) ];
my $past = $slashdb->getStoryByTimeAdmin('<', $storyref, "3");
+ my $num_sim = $constants->{similarstorynumshow} || 5;
+ my $similar_stories = $slashdb->getSimilarStories($storyref, $num_sim);
+ # Truncate that data to a reasonable size for display.
+ if ($similar_stories && @$similar_stories) {
+ for my $sim (@$similar_stories) {
+ # Display a max of five words reported per story.
+ $#{$sim->{words}} = 4 if $#{$sim->{words}} > 4;
+ for my $word (@{$sim->{words}}) {
+ # Max of 12 chars per word.
+ $word = substr($word, 0, 12);
+ }
+ if (length($sim->{title}) > 35) {
+ # Max of 35 char title.
+ $sim->{title} = substr($sim->{title}, 0, 30);
+ $sim->{title} =~ s/\s+\S+$//;
+ $sim->{title} .= "...";
+ }
+ }
+ }
+#use Data::Dumper; print STDERR "similar_stories: " . Dumper($similar_stories);
+
my $authortext = slashDisplay('futurestorybox', {
past => $past,
present => $storyref,
@@ -1180,6 +1201,7 @@ sub editStory {
extras => $extracolumns,
multi_topics => $multi_topics,
story_topics => $story_topics,
+ similar_stories => $similar_stories,
});
}
@@ -1423,12 +1445,14 @@ sub updateStory {
$slashdb->setStoryTopics($form->{sid}, $tid_ref);
}
+ $form->{introtext} = slashizeLinks($form->{introtext});
+ $form->{bodytext} = slashizeLinks($form->{bodytext});
my $data = {
- uid => $form->{uid},
- sid => $form->{sid},
- title => $form->{title},
- section => $form->{section},
+ uid => $form->{uid},
+ sid => $form->{sid},
+ title => $form->{title},
+ section => $form->{section},
tid => $form->{tid},
dept => $form->{dept},
'time' => $time,
@@ -1448,6 +1472,7 @@ sub updateStory {
}
}
+#use Data::Dumper; print STDERR "updateStory setStory data " . Dumper($data);
$slashdb->setStory($form->{sid}, $data);
my $dis_data = {
sid => $data->{sid},
@@ -1498,6 +1523,8 @@ sub saveStory {
$form->{relatedtext} = getRelated(
"$form->{title} $form->{bodytext} $form->{introtext}"
) . otherLinks($edituser->{nickname}, $form->{tid}, $edituser->{uid});
+ $form->{introtext} = slashizeLinks($form->{introtext});
+ $form->{bodytext} = slashizeLinks($form->{bodytext});
my $time = ($form->{fastforward})
? $slashdb->getTime()
@@ -1531,6 +1558,7 @@ sub saveStory {
$data->{$key} = $form->{$key} if $form->{$key};
}
}
+#use Data::Dumper; print STDERR "saveStory createStory extras '@$extras' data " . Dumper($data);
my $sid = $slashdb->createStory($data);
# we can use multiple values in forms now, we don't
View
7 plugins/Admin/dump
@@ -16,3 +16,10 @@ INSERT INTO menus (menu, label, value, seclev, menuorder) VALUES ('admin','Site'
INSERT INTO menus (menu, label, value, seclev, menuorder) VALUES ('admin','Slashd','[% constants.rootdir %]/admin.pl?op=slashd',500,17);
INSERT INTO menus (menu, label, value, seclev, menuorder) VALUES ('admin','Help','[% constants.rootdir %]/slashguide.shtml',1,17);
INSERT INTO vars (name, value, description) VALUES ('ispell', 'ispell', 'Location of ispell binary or empty string to turn off');
+INSERT INTO vars (name, value, description) VALUES ('similarstorydays', '30', 'Number of days to look back for uncommon words when determining similar stories');
+INSERT INTO vars (name, value, description) VALUES ('similarstoryminweight', '4', 'Minimum weight necessary for a story to be displayed as similar');
+INSERT INTO vars (name, value, description) VALUES ('similarstorynumshow', '5', 'Maximum number of similar stories to show in admin preview');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstorywords', '', 'A generated list of uncommon words found in recent stories');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstorywords_maxlen', '65000', 'Maximum length of the uncommon words list');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstoryword_minlen', '3', 'Minimum length of the words in the uncommon words list');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstoryword_thresh', '2', 'Words occurring more often than once every this-many days are considered common');
View
17 plugins/Admin/refresh_uncommon.pl
@@ -0,0 +1,17 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use vars qw( %task $me );
+
+$task{$me}{timespec} = '50 0,6,12,18 * * *';
+$task{$me}{timespec_panic_1} = ''; # if panic, we can wait
+$task{$me}{on_startup} = 1;
+$task{$me}{code} = sub {
+ my($virtual_user, $constants, $slashdb, $user) = @_;
+
+ $slashdb->refreshUncommonStoryWords();
+};
+
+1;
+
View
32 plugins/Admin/templates/editStory;admin;default
@@ -2,8 +2,8 @@ __section__
default
__description__
-Added to storyref, introtext_wordcount and bodytext_wordcount. These fields are inserted by admin.pl
-before this template is displayed.
+Added to storyref, introtext_wordcount and bodytext_wordcount. These
+fields are inserted by admin.pl before this template is displayed.
__title__
__page__
@@ -85,7 +85,29 @@ __template__
</TD>
</TR>
</TABLE>
- [% END %]
+[% END %]
+
+[% IF similar_stories.size > 0 %]
+ <TABLE BORDER="0" CELLPADDING="2" CELLSPACING="0">
+ <TR><TD COLSPAN="5" ALIGN="CENTER"><B>Similar Stories:</B></TD></TR>
+ [% FOREACH story = similar_stories %]
+ <TR>
+ <TD VALIGN="TOP">[% IF story.displaystatus == 0;
+ "&nbsp;";
+ ELSIF story.displaystatus == 1;
+ "<I>sect</I>";
+ ELSE;
+ "<I>ND</I>";
+ END; %]</TD>
+ <TD ALIGN="RIGHT" VALIGN="TOP">[% story.weight %]</TD>
+ <TD VALIGN="TOP"><A HREF="[% constants.rootdir %]/article.pl?sid=[% story.sid %]">
+ [% story.title %]</A></TD>
+ <TD VALIGN="TOP">[% Slash.timeCalc(story.time) %]</TD>
+ <TD VALIGN="TOP">[% story.words.join(" ") %]</TD>
+ </TR>
+ [% END %]
+ </TABLE>
+[% END %]
<TABLE BORDER="0" CELLPADDING="2" CELLSPACING="0">
<TR>
@@ -187,7 +209,7 @@ __template__
</TABLE>
<BR>Intro Copy[% IF storyref.introtext_wordcount %] ([%
-storyref.introtext_wordcount %] word[s])[% END %]<BR>
+storyref.introtext_wordcount %] word[% IF storyref.introtext_wordcount != 1; "s"; END %])[% END %]<BR>
<TABLE BORDER="0" CELLPADDING="2" CELLSPACING="0">
<TR>
<TD><TEXTAREA WRAP="VIRTUAL" NAME="introtext" COLS="[% user.textarea_cols || constants.textarea_cols %]" ROWS="10">[% storyref.introtext | strip_literal %]</TEXTAREA></TD>
@@ -197,7 +219,7 @@ storyref.introtext_wordcount %] word[s])[% END %]<BR>
[% PROCESS editbuttons %]
<BR>Extended Copy[% IF storyref.bodytext_wordcount %] ([%
-storyref.bodytext_wordcount %] word[s])[% END %]<BR>
+storyref.bodytext_wordcount %] word[% IF storyref.bodytext_wordcount != 1; "s"; END %])[% END %]<BR>
<TABLE BORDER="0" CELLPADDING="2" CELLSPACING="0">
<TR>
<TD>
View
2 plugins/SOAP/soap.pl
@@ -24,7 +24,7 @@ sub main {
if ($user->{state}{post}) {
$r->method('POST');
}
- # Do some secutiry checking here
+ # Do some security checking here
$user->{state}{packagename} = __PACKAGE__;
return SOAP::Transport::HTTP::Apache->dispatch_to
($action)->handle;
View
13 sql/mysql/upgrades
@@ -328,3 +328,16 @@ INSERT INTO vars (name, value, description) VALUES ('recent_topic_img_count','5'
INSERT INTO vars (name, value, description) VALUES ('recent_topic_txt_count','5','Number of recent topics to store in the block "recenttopics"');
# SLASHCODE LAST UPDATED HERE
+
+# For plugins/Admin
+INSERT INTO vars (name, value, description) VALUES ('similarstorydays', '30', 'Number of days to look back for uncommon words');
+INSERT INTO vars (name, value, description) VALUES ('similarstoryminweight', '4', 'Minimum weight necessary for a story to be displayed as similar');
+INSERT INTO vars (name, value, description) VALUES ('similarstorynumshow', '5', 'Maximum number of similar stories to show in admin preview');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstorywords', '', 'Uncommon words found in recent stories');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstorywords_maxlen', '65000', 'Maximum length of the uncommon words list');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstoryword_minlen', '3', 'Minimum length of the words in the uncommon words list');
+INSERT INTO vars (name, value, description) VALUES ('uncommonstoryword_thresh', '2', 'Words occurring more often than once every this-many days are considered common');
+# and this is not a DB command but needs to be done for
+# each installed site that uses plugins/Admin:
+ln -s /usr/local/slash/plugins/Admin/refresh_uncommon.pl /usr/local/slash/site/sitename/tasks/
+
View
6 themes/slashcode/htdocs/article.pl
@@ -19,7 +19,8 @@ sub main {
my $story;
#Yeah, I am being lazy and paranoid -Brian
- if (!($user->{author} or $user->{is_admin}) and !$slashdb->checkStoryViewable($form->{sid})) {
+ if (!($user->{author} || $user->{is_admin})
+ && !$slashdb->checkStoryViewable($form->{sid})) {
$story = '';
} else {
$story = $slashdb->getStory($form->{sid});
@@ -30,6 +31,9 @@ sub main {
my $title = $SECT->{isolate} ?
"$SECT->{title} | $story->{title}" :
"$constants->{sitename} | $story->{title}";
+ $story->{introtext} = parseSlashizedLinks($story->{introtext});
+ $story->{bodytext} = parseSlashizedLinks($story->{bodytext});
+
my $authortext;
if ($user->{is_admin} ) {
my $future = $slashdb->getStoryByTimeAdmin('>', $story, "3");

0 comments on commit 7736a75

Please sign in to comment.