/
main.go
103 lines (86 loc) · 1.79 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
package main
import (
"fmt"
"image"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
_ "golang.org/x/image/bmp"
_ "golang.org/x/image/tiff"
)
func dirwalk(dir string) []string {
files, err := ioutil.ReadDir(dir)
if err != nil {
panic(err)
}
var paths []string
for _, file := range files {
if file.IsDir() {
paths = append(paths, dirwalk(filepath.Join(dir, file.Name()))...)
continue
}
paths = append(paths, filepath.Join(dir, file.Name()))
}
return paths
}
func ocr(format string, img string, path string, lang string) {
if strings.Contains(format, img) {
fmt.Println(path)
cmd := exec.Command("tesseract", path, path, "-l", lang)
err := cmd.Run()
if err != nil {
panic(err)
}
} else {
}
}
func runCommand(dir string, lang string) {
paths := dirwalk(dir)
fmt.Println("Processing...")
imgs := [...]string{"jpeg", "jpg", "bmp", "png", "gif"}
wg := &sync.WaitGroup{}
for _, path := range paths {
wg.Add(1)
go func(path string) {
defer wg.Done()
f, _ := os.Open(path)
defer f.Close()
_, format, err := image.DecodeConfig(f) // Get the image file format.
if err != nil {
fmt.Println(err)
}
for _, img := range imgs {
ocr(format, img, path, lang)
}
}(path)
}
wg.Wait()
}
// Supported image types: jpeg, bmp, png, gif
func main() {
var dir string
var lang string
if len(os.Args) == 2 {
dir = os.Args[1]
if dir == "-h" || dir == "--help" {
fmt.Println(`USAGE
$ go run main.go <Dir> <Lang Code>`)
os.Exit(1)
}
}
if len(os.Args) != 3 {
fmt.Println("The number of arguments specified is incorrect.")
os.Exit(1)
} else {
dir = os.Args[1]
lang = os.Args[2] // Tesseract language specification options.
}
runCommand(dir, lang)
fmt.Println("\nDone!")
}