Skip to content
Browse files

Added flay

[git-p4: depot-paths = "//src/flay/dev/": change = 4208]
  • Loading branch information...
0 parents commit 019fcebc93a87ef892bdc386488d59aecd617469 @zenspider zenspider committed Sep 17, 2008
Showing with 306 additions and 0 deletions.
  1. +6 −0 History.txt
  2. +7 −0 Manifest.txt
  3. +48 −0 README.txt
  4. +15 −0 Rakefile
  5. +7 −0 bin/flay
  6. +185 −0 lib/flay.rb
  7. +38 −0 notes.txt
6 History.txt
@@ -0,0 +1,6 @@
+=== 1.0.0 / 2008-09-17
+
+* 1 major enhancement
+
+ * Birthday!
+
7 Manifest.txt
@@ -0,0 +1,7 @@
+History.txt
+Manifest.txt
+README.txt
+Rakefile
+bin/flay
+lib/flay.rb
+test/test_flay.rb
48 README.txt
@@ -0,0 +1,48 @@
+= flay
+
+* FIX (url)
+
+== DESCRIPTION:
+
+FIX (describe your package)
+
+== FEATURES/PROBLEMS:
+
+* FIX (list of features or problems)
+
+== SYNOPSIS:
+
+ FIX (code sample of usage)
+
+== REQUIREMENTS:
+
+* FIX (list of requirements)
+
+== INSTALL:
+
+* FIX (sudo gem install, anything else)
+
+== LICENSE:
+
+(The MIT License)
+
+Copyright (c) 2008 FIX
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 Rakefile
@@ -0,0 +1,15 @@
+# -*- ruby -*-
+
+require 'rubygems'
+require 'hoe'
+require './lib/flay.rb'
+
+Hoe.new('flay', Flay::VERSION) do |flay|
+ flay.rubyforge_name = 'seattlerb'
+ flay.developer('Ryan Davis', 'ryand-ruby@zenspider.com')
+
+ flay.extra_deps << ['sexp_processor', '>= 3.0.0']
+ flay.extra_deps << ['ruby_parser', '>= 1.1.0']
+end
+
+# vim: syntax=Ruby
7 bin/flay
@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+
+require 'flay'
+
+flay = Flay.new
+flay.process(*ARGV)
+flay.report
185 lib/flay.rb
@@ -0,0 +1,185 @@
+#!/usr/bin/env ruby -w
+
+$: << "../../sexp_processor/dev/lib"
+$: << "../../ruby_parser/dev/lib"
+
+require 'rubygems'
+require 'sexp_processor'
+require 'ruby_parser'
+require 'pp' # TODO: remove
+
+class Flay
+ VERSION = '1.0.0'
+
+ attr_reader :hashes
+
+ def initialize
+ @hashes = Hash.new { |h,k| h[k] = [] }
+ end
+
+ def process(*files)
+ files.each do |file|
+ warn "Processing #{file}..."
+ pt = RubyParser.new.process(File.read(file), file)
+ warn "... done parsing"
+
+ last_line = 0
+ p last_line
+
+
+ pt.deep_each do |node|
+ next unless node.any? { |sub| Sexp === sub }
+ next unless node.complex_enough?
+
+ l = node.line
+ if l % 5 == 0 && last_line != l then
+ last_line = l
+ p last_line
+ end
+
+ self.hashes[node.hash] << node
+ end
+ end
+ end
+
+ def prune
+ # prune trees that aren't duped at all.
+ self.hashes.delete_if { |_,nodes| nodes.size == 1 }
+
+ # extract all subtree hashes from all nodes
+ all_hashes = self.hashes.values.map { |nodes|
+ nodes.map { |node| node.all_subhashes }
+ }.flatten.uniq
+
+ # nuke subtrees so we show the biggest matching tree possible
+ self.hashes.delete_if { |h,_| all_hashes.include? h }
+ end
+
+ def report prune = nil
+ self.prune
+
+ ds = []
+ ns = []
+ dns = []
+
+ self.hashes.each do |_,nodes|
+ next unless nodes.first.first == prune if prune
+ puts "Matches found in: #{nodes.first.first}"
+ nodes.each do |node|
+ d, n = node.depth, node.number_of_nodes
+ dn = d * n
+ ds << d
+ ns << n
+ dns << dn
+ puts " #{node.file}:#{node.line} (d=#{d}, n=#{n}, dn = #{dn})"
+ end
+ end
+
+ puts "number of nodes = #{ds.size}"
+ puts "depth = #{ds.average} +/- #{ds.standard_deviation}"
+ puts "nodes = #{ns.average} +/- #{ns.standard_deviation}"
+ puts "prodt = #{dns.average} +/- #{dns.standard_deviation}"
+ end
+end
+
+class Sexp
+ def hash
+ h = [self.first.hash]
+ self.each do |e|
+ next unless Sexp === e
+ h << e.hash
+ end
+ h.hash
+ end
+
+ def each_sexp
+ self.each do |sexp|
+ next unless Sexp === sexp
+
+ yield sexp
+ end
+ end
+
+ def all_subhashes
+ hashes = []
+ self.deep_each do |node|
+ hashes << node.hash
+ end
+ hashes[1..-1].uniq
+ end
+
+ def deep_each(&block)
+ self.each_sexp do |sexp|
+ block[sexp]
+ sexp.deep_each(&block)
+ end
+ end
+
+ def depth
+ self.map { |sexp|
+ next unless Sexp === sexp
+ sexp.depth + 1
+ }.compact.max || 0
+ end
+
+ def number_of_nodes
+ nodes = 0
+ self.deep_each do |n|
+ nodes += 1
+ end
+ nodes
+ end
+
+ def complex_enough?
+ d, n = self.depth, self.number_of_nodes
+
+ d * n > 75 # my avg product + stddev. woot
+ end
+
+ alias :shut_up! :pretty_print
+ def pretty_print(q) # shows the hash
+ q.group(1, 'S(', ')') do
+ q.seplist(self + ["#{self.file}:#{self.line}"]) {|v| q.pp v }
+ end
+ end
+end
+
+module Enumerable # TEMPORARY
+ ##
+ # Sum of all the elements of the Enumerable
+
+ def sum
+ return self.inject(0) { |acc, i| acc + i }
+ end
+
+ ##
+ # Average of all the elements of the Enumerable
+ #
+ # The Enumerable must respond to #length
+
+ def average
+ return self.sum / self.length.to_f
+ end
+
+ ##
+ # Sample variance of all the elements of the Enumerable
+ #
+ # The Enumerable must respond to #length
+
+ def sample_variance
+ avg = self.average
+ sum = self.inject(0) { |acc, i| acc + (i - avg) ** 2 }
+ return (1 / self.length.to_f * sum)
+ end
+
+ ##
+ # Standard deviation of all the elements of the Enumerable
+ #
+ # The Enumerable must respond to #length
+
+ def standard_deviation
+ return Math.sqrt(self.sample_variance)
+ end
+
+end
+
38 notes.txt
@@ -0,0 +1,38 @@
+# # 1. Build the list structures describing sequences
+# # 2. For k = MinimumSequenceLengthThreshold
+# # to MaximumSequenceLength
+# # 3. Place all subsequences of length k
+# # into buckets according to subsequence hash
+# # 4. For each subsequence i and j in same bucket
+# # If CompareSequences (i,j,k) >
+# # SimilarityThreshold
+# # Then { RemoveSequenceSubclonesOf(clones,i,j,k)
+# # AddSequenceClonePair(Clones,i,j,k)
+# # }
+
+# # TODO: process this as the same
+# # save(get(x * 3) - (x + 6) - 4);
+# # print(get(x * 3) - (x * 5) + 4);
+
+# # Figure 1 - Basic Subtree Clone Detection Algorithm x=0;
+
+# # 1. Clones=[]
+# # 2. For each subtree i:
+# # If mass(i)>=MassThreshold
+# # Then hash i to bucket
+# # 3. For each subtree i and j in the same bucket
+# # If CompareTree(i,j) > SimilarityThreshold
+# # Then { For each subtree s of i
+# # If IsMember(Clones,s)
+# # Then RemoveClonePair(Clones,s)
+# # For each subtree s of j
+# # If IsMember(Clones,s)
+# # Then RemoveClonePair(Clones,s)
+# # AddClonePair(Clones,i,j)
+# # }
+
+# # Similarity = 2 x S / (2 x S + L + R)
+# # where:
+# # S = number of shared nodes
+# # L = number of different nodes in sub-tree 1
+# # R = number of different nodes in sub-tree 2

0 comments on commit 019fceb

Please sign in to comment.
Something went wrong with that request. Please try again.