Skip to content

Commit

Permalink
Make it work
Browse files Browse the repository at this point in the history
  • Loading branch information
rubenv committed May 22, 2015
1 parent 946880d commit fd73c7a
Show file tree
Hide file tree
Showing 5 changed files with 255 additions and 13 deletions.
31 changes: 31 additions & 0 deletions catalog_test.go
@@ -0,0 +1,31 @@
package dupefinder

import (
"strings"
"testing"
)

func TestCatalogParse(t *testing.T) {
in := `
# A comment and an empty line:
00e3261a6e0d79c329445acd540fb2b07187a0dcf6017065c8814010283ac67f test
5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03 a/b.txt
`

reader := strings.NewReader(in)

entries, err := ParseCatalogReader(reader)
if err != nil {
t.Error(err)
}
if len(entries) != 2 {
t.Errorf("Unexpected number of entries: %d", len(entries))
}
if entries["00e3261a6e0d79c329445acd540fb2b07187a0dcf6017065c8814010283ac67f"] != "test" {
t.Error("Bad entry")
}
if entries["5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03"] != "a/b.txt" {
t.Error("Bad entry")
}
}
198 changes: 188 additions & 10 deletions dupefinder.go
@@ -1,26 +1,71 @@
package dupefinder

import (
"bufio"
"crypto/sha256"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"strings"
)

type FileHash struct {
Hash string
Filename string
}

const header = `# This is a dupefinder catalog
#
# See https://github.com/rubenv/dupefinder for more info
`

func Generate(catalog string, folders ...string) error {
err := validateFolders(folders...)
if err != nil {
return err
}

errs := make(chan error)

go walkAllFolders(errs, folders...)
out, err := os.Create(catalog)
if err != nil {
return err
}
defer out.Close()

err = <-errs
_, err = out.WriteString(header)
if err != nil {
return err
}

errs := make(chan error)
filenames := make(chan string, 100)
entries := make(chan FileHash, 100)

go walkAllFolders(errs, filenames, folders...)
go hashFiles(errs, filenames, entries)

for {
entry, ok := <-entries
if !ok {
break
}

_, err := out.WriteString(fmt.Sprintf("%s %s\n", entry.Hash, entry.Filename))
if err != nil {
return err
}
}

select {
case err := <-errs:
if err != nil {
return err
}
default:
}

return nil
}

Expand All @@ -30,6 +75,55 @@ func Detect(catalog string, echo, rm bool, folders ...string) error {
return err
}

catalogEntries, err := ParseCatalog(catalog)
if err != nil {
return err
}

errs := make(chan error)
filenames := make(chan string, 100)
entries := make(chan FileHash, 100)

go walkAllFolders(errs, filenames, folders...)
go hashFiles(errs, filenames, entries)

deleted := int64(0)
for {
entry, ok := <-entries
if !ok {
break
}

if orig, ok := catalogEntries[entry.Hash]; ok {
fi, err := os.Stat(entry.Filename)
if err != nil {
return err
}

deleted += fi.Size()

if echo {
fmt.Printf("Would delete %s (matches %s)\n", entry.Filename, orig)
} else {
fmt.Printf("Deleting %s (matches %s)\n", entry.Filename, orig)
err := os.Remove(entry.Filename)
if err != nil {
return err
}
}
}
}

fmt.Printf("Size saved: %d bytes\n", deleted)

select {
case err := <-errs:
if err != nil {
return err
}
default:
}

return nil
}

Expand Down Expand Up @@ -62,28 +156,112 @@ func isFolder(filename string) (bool, error) {
return fi.IsDir(), nil
}

func walkAllFolders(errs chan error, folders ...string) {
defer close(errs)
func walkAllFolders(errs chan error, filenames chan string, folders ...string) {
defer close(filenames)

for _, f := range folders {
err := walkFolder(f)
err := walkFolder(f, filenames)
if err != nil {
errs <- err
return
}
}
}

func walkFolder(filename string) error {
func walkFolder(filename string, out chan string) error {
fi, err := ioutil.ReadDir(filename)
if err != nil {
return err
}

fmt.Println(filename)
for _, child := range fi {
fmt.Println(child)
fullname := path.Join(filename, child.Name())
if child.IsDir() {
err := walkFolder(fullname, out)
if err != nil {
return err
}
} else if child.Mode().IsRegular() {
out <- fullname
}
}

return nil
}

func hashFiles(errs chan error, filenames chan string, entries chan FileHash) {
defer close(entries)

for {
filename, ok := <-filenames
if !ok {
return
}

hash, err := hashFile(filename)
if err != nil {
errs <- err
return
}

entries <- FileHash{
Hash: hash,
Filename: filename,
}
}
}

func hashFile(filename string) (string, error) {
file, err := os.Open(filename)
if err != nil {
return "", err
}
defer file.Close()

hash := sha256.New()
if _, err := io.Copy(hash, file); err != nil {
return "", err
}

return fmt.Sprintf("%x", hash.Sum([]byte{})), nil
}

func ParseCatalog(filename string) (map[string]string, error) {
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()

return ParseCatalogReader(file)
}

func ParseCatalogReader(reader io.Reader) (map[string]string, error) {
result := map[string]string{}

bufreader := bufio.NewReader(reader)

done := false
for !done {
line, err := bufreader.ReadString('\n')
if err == io.EOF {
done = true
} else if err != nil {
return nil, err
}

line = strings.TrimSpace(line)
if line == "" || line[0] == '#' {
continue
}

parts := strings.SplitN(line, " ", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("Malformed line: %#v", line)
}

result[parts[0]] = parts[1]
}

return result, nil
}
1 change: 1 addition & 0 deletions fixtures/a/c/bla.txt
@@ -0,0 +1 @@
bla
22 changes: 19 additions & 3 deletions generate_test.go
@@ -1,6 +1,9 @@
package dupefinder

import "testing"
import (
"os"
"testing"
)

func TestGenerateFolders(t *testing.T) {
err := Generate("catalog.txt", "invalid")
Expand All @@ -10,10 +13,23 @@ func TestGenerateFolders(t *testing.T) {
}

func TestGenerateFinds(t *testing.T) {
err := Generate("catalog.txt", "fixtures/a")
catalog := tempFilename(t)
defer os.Remove(catalog)

err := Generate(catalog, "fixtures/a")
if err != nil {
t.Error(err)
}

// TODO: Read catalog, see if files are there and check checksums
entries, err := ParseCatalog(catalog)
if err != nil {
t.Error(err)
}
if len(entries) != 2 {
t.Errorf("Unexpected number of entries: %d", len(entries))
}

if entries["00e3261a6e0d79c329445acd540fb2b07187a0dcf6017065c8814010283ac67f"] != "fixtures/a/c/bla.txt" {
t.Error("Bad entry")
}
}
16 changes: 16 additions & 0 deletions init_test.go
@@ -0,0 +1,16 @@
package dupefinder

import (
"io/ioutil"
"testing"
)

func tempFilename(t *testing.T) string {
f, err := ioutil.TempFile("", "dupefinder")
if err != nil {
t.Fatal(err)
}
f.Close()

return f.Name()
}

0 comments on commit fd73c7a

Please sign in to comment.