Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Hash function ok. Dump function for debug

  • Loading branch information...
commit 540cb22fca55f8e61f35819b88ab24fd353cb934 1 parent b2f2f38
@ook authored
Showing with 25 additions and 1 deletion.
  1. +25 −1 lib/remove_duplicate.rb
View
26 lib/remove_duplicate.rb
@@ -2,8 +2,9 @@
## Take in entry a list of path to analyse
## It will scan every files underthem and
## try to detect similar files.
-## family -> file_hash -> pathnames
+## family -> file_hash -> [pathnames]
require 'pathname'
+require 'digest/md5'
class RemoveDuplicate
@@ -14,6 +15,7 @@ def initialize(path_as_strings)
:verbose => false
}
stage_files(path_as_strings)
+ @scanned = {}
end
def stage_files(path_as_strings)
@@ -29,9 +31,31 @@ def stage_files(path_as_strings)
@staged_files
end
+ # pathname: a readable pathname
+ def hash(pathname)
+ ext = pathname.extname
+ ext = ('' == ext || nil == ext) ? :none : ext.to_sym
+ digest = Digest::MD5.hexdigest(File.read(pathname.to_s))
+ @scanned[ext] ||= {}
+ @scanned[ext][digest] ||= []
+ @scanned[ext][digest] << pathname
+ end
+
def run
puts "#{@staged_files.length} staged files"
puts @staged_files.map(&:to_s).join
+ @staged_files.each { |pathname| hash(pathname) }
+ puts dump
+ end
+
+ def dump
+ output = ''
+ @scanned.keys.each do |ext|
+ @scanned[ext].keys.each do |digest|
+ output << "#{ext.to_s}->#{digest}->#{@scanned[ext][digest].join(',')}\n"
+ end
+ end
+ output
end
end
Please sign in to comment.
Something went wrong with that request. Please try again.