#!/usr/bin/env python
import os
import sys
import hashlib
if len(sys.argv) != 2:
print 'Usage: find-duplicates <directory>'
seen = {}
for subdir, dirs, files in os.walk(sys.argv[1]):
for file in files:
file = os.path.join(subdir, file)
md5 = hashlib.md5(open(file).read()).hexdigest()
if md5 in seen:
print "%s <==> %s" % (file, seen[md5])
seen[md5] = file
