Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 213 lines (146 sloc) 3.38 KB
#!/usr/bin/env perl
=head1 NAME
dupes - Report on files with duplicate contents, via SHA1 hash.
=cut
=head1 SYNOPSIS
dupes [options] directory
General Options:
--help Show the help information for this script.
--verbose Show useful debugging information.
=cut
=head1 ABOUT
dupes is a simple script to report upon files that are identical,
recursively.
The process involves calculating the SHA1 hash of the file contents
and reporting on anything collisions we see.
Note that a collision might be caused by a symbolic link, or hardlink,
so blindly deleting duplicates without investigation is almost certainly
a mistake.
=cut
=head1 AUTHOR
Steve
--
http://www.steve.org.uk/
=cut
=head1 LICENSE
Copyright (c) 2013 by Steve Kemp. All rights reserved.
This script is free software;you can redistribute it and/or modify it under
the same terms as Perl itself.
The LICENSE file contains the full text of the license.
=cut
use strict;
use warnings;
use File::Find;
use Getopt::Long;
use Pod::Usage;
#
# Parse the arguments
#
my %config = parsedOptions();
#
# The path to examine.
#
my $path = $ARGV[0] || '.';
#
# Get the hashing object, dynamically.
#
my $ctx = getHashObject();
my %digest;
#
# Find files and store the hash of their contents.
#
find( {
'wanted' => sub {
if ( -f $_ )
{
lstat;
if ( ( -r _ ) && ( !-l _ ) )
{
$ctx->reset;
$ctx->addfile($_);
my $md5 = $ctx->hexdigest;
if ( exists $digest{ $md5 } )
{
push @{ $digest{ $md5 }->{ 'dupes' } }, $_;
}
else
{
$digest{ $md5 } = { 'file' => $_,
'dupes' => [] };
}
}
}
else
{
$config{ 'verbose' } && print "Entering $_\n";
}
},
'no_chdir' => 1
},
$path
);
#
# Report upon collisions.
#
foreach my $hash ( keys %digest )
{
my $dupes = $digest{ $hash }->{ 'dupes' };
my $src = $digest{ $hash }->{ 'file' };
if (@$dupes)
{
print $src . "\n";
foreach my $dupe (@$dupes)
{
print "\t$dupe\n";
}
}
}
#
# All done.
#
exit(0);
=begin doc
Load one of M<Digest::SHA> and M<Digest::SHA1>, depending on what is available.
=end doc
=cut
sub getHashObject
{
my $hash = undef;
foreach my $module (qw! Digest::SHA Digest::SHA1 !)
{
# If we succeeded in calculating the hash we're done.
next if ( defined($hash) );
# Attempt to load the module
my $eval = "use $module;";
## no critic (Eval)
eval($eval);
## use critic
if ( !$@ )
{
$hash = $module->new;
}
}
if ($hash)
{
return ($hash);
}
else
{
print "Failed to load either DIgest::SHA or Digest::SHA1\n";
exit(1);
}
}
=begin doc
Parse the options and return suitable values.
=end doc
=cut
sub parsedOptions
{
my %vars;
exit
if (
!GetOptions( "help" => \$vars{ 'help' },
"verbose" => \$vars{ 'verbose' } ) );
pod2usage(1) if ( $vars{ 'help' } );
return (%vars);
}