Permalink
Browse files

! Added --liberal, which changes the way prune works to identify more…

… duplication.

Refactored extracting update_masses.
Refactored extracting prune_conservatively.
Added prune_liberally and run it optionally in prune.
+ Made report's sort more stable, so I can do better comparison runs.
Added Array#delete_eql for liberal prune.

[git-p4: depot-paths = "//src/flay/dev/": change = 8323]
  • Loading branch information...
1 parent eb4591f commit bf52bbbd8c3c6076f03d2f97888e3afe9fdab33d @zenspider zenspider committed Mar 22, 2013
Showing with 119 additions and 2 deletions.
  1. +66 −2 lib/flay.rb
  2. +53 −0 test/test_flay.rb
View
@@ -24,6 +24,7 @@ def self.default_options
:summary => false,
:verbose => false,
:timeout => 10,
+ :liberal => false,
}
end
@@ -47,6 +48,10 @@ def self.parse_options args = ARGV
abort "--fuzzy is no longer supported. Sorry. It sucked."
end
+ opts.on('-l', '--liberal', "Use a more liberal detection method.") do
+ options[:liberal] = true
+ end
+
opts.on('-m', '--mass MASS', Integer,
"Sets mass threshold (default = #{options[:mass]})") do |m|
options[:mass] = m.to_i
@@ -174,6 +179,15 @@ def analyze
self.hashes.each do |hash,nodes|
identical[hash] = nodes[1..-1].all? { |n| n == nodes.first }
+ end
+
+ update_masses
+ end
+
+ def update_masses
+ self.total = 0
+ masses.clear
+ self.hashes.each do |hash, nodes|
masses[hash] = nodes.first.mass * nodes.size
masses[hash] *= (nodes.size) if identical[hash]
self.total += masses[hash]
@@ -201,8 +215,15 @@ def prune
# prune trees that aren't duped at all, or are too small
self.hashes.delete_if { |_,nodes| nodes.size == 1 }
- # extract all subtree hashes from all nodes
+ return prune_liberally if option[:liberal]
+
+ prune_conservatively
+ end
+
+ def prune_conservatively
all_hashes = {}
+
+ # extract all subtree hashes from all nodes
self.hashes.values.each do |nodes|
nodes.first.all_structural_subhashes.each do |h|
all_hashes[h] = true
@@ -213,6 +234,37 @@ def prune
self.hashes.delete_if { |h,_| all_hashes[h] }
end
+ def prune_liberally
+ update_masses
+
+ all_hashes = Hash.new { |h,k| h[k] = [] }
+
+ # record each subtree by subhash, but skip if subtree mass > parent mass
+ self.hashes.values.each do |nodes|
+ nodes.each do |node|
+ tophash = node.structural_hash
+ topscore = self.masses[tophash]
+
+ node.deep_each do |subnode|
+ subhash = subnode.structural_hash
+ subscore = self.masses[subhash]
+
+ next if subscore and subscore > topscore
+
+ all_hashes[subhash] << subnode
+ end
+ end
+ end
+
+ # nuke only individual items by object identity
+ self.hashes.each do |h,v|
+ v.delete_eql all_hashes[h]
+ end
+
+ # nuke buckets we happened to fully empty
+ self.hashes.delete_if { |k,v| v.size <= 1 }
+ end
+
def n_way_diff *data
data.each_with_index do |s, i|
c = (?A.ord + i).chr
@@ -274,7 +326,13 @@ def report prune = nil
end
count = 0
- masses.sort_by { |h,m| [-m, hashes[h].first.file] }.each do |hash, mass|
+ sorted = masses.sort_by { |h,m|
+ [-m,
+ hashes[h].first.file,
+ hashes[h].first.line,
+ hashes[h].first.first.to_s]
+ }
+ sorted.each do |hash, mass|
nodes = hashes[hash]
next unless nodes.first.first == prune if prune
puts
@@ -327,3 +385,9 @@ def all_structural_subhashes
hashes
end
end
+
+class Array
+ def delete_eql other
+ self.delete_if { |o1| other.any? { |o2| o1.equal? o2 } }
+ end
+end
View
@@ -24,6 +24,20 @@ def test_structural_hash
assert_equal hash, @s.deep_clone.structural_hash
end
+ def test_delete_eql
+ s1 = s(:a, s(:b, s(:c)))
+ s2 = s(:a, s(:b, s(:c)))
+ s3 = s(:a, s(:b, s(:c)))
+
+ a1 = [s1, s2, s3]
+ a2 = [s1, s3]
+
+ a1.delete_eql a2
+
+ assert_equal [s2], a1
+ assert_same s2, a1.first
+ end
+
def test_all_structural_subhashes
s = s(:iter,
s(:call, s(:arglist, s(:lit))),
@@ -109,6 +123,45 @@ def test_prune
assert_equal exp, flay.hashes.values.sort_by(&:inspect)
end
+ def test_prune_liberal
+ contained = s(:a, s(:b,s(:c)), s(:d,s(:e)))
+ container = s(:d, contained)
+
+ flay = Flay.new :mass => 0, :liberal => true
+ flay.process_sexp s(:outer,contained)
+ 2.times { flay.process_sexp s(:outer,container) }
+
+ exp = eval <<-EOM # just to prevent emacs from reindenting it
+ [
+ [ s(:a, s(:b, s(:c)), s(:d, s(:e))),
+ s(:a, s(:b, s(:c)), s(:d, s(:e))),
+ s(:a, s(:b, s(:c)), s(:d, s(:e)))],
+ [ s(:b, s(:c)),
+ s(:b, s(:c)),
+ s(:b, s(:c))],
+ [s(:d, s(:a, s(:b, s(:c)), s(:d, s(:e)))),
+ s(:d, s(:a, s(:b, s(:c)), s(:d, s(:e))))],
+ [ s(:d, s(:e)),
+ s(:d, s(:e)),
+ s(:d, s(:e))],
+ ]
+ EOM
+
+ assert_equal exp, flay.hashes.values.sort_by(&:inspect)
+
+ flay.prune
+
+ exp = [
+ [s(:a, s(:b, s(:c)), s(:d, s(:e))),
+ s(:a, s(:b, s(:c)), s(:d, s(:e))),
+ s(:a, s(:b, s(:c)), s(:d, s(:e)))],
+ [s(:d, s(:a, s(:b, s(:c)), s(:d, s(:e)))),
+ s(:d, s(:a, s(:b, s(:c)), s(:d, s(:e))))]
+ ]
+
+ assert_equal exp, flay.hashes.values.sort_by(&:inspect)
+ end
+
def test_process_sexp
flay = Flay.new

0 comments on commit bf52bbb

Please sign in to comment.