-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
88 lines (80 loc) · 3.04 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
documentai "cloud.google.com/go/documentai/apiv1"
"cloud.google.com/go/documentai/apiv1/documentaipb"
"github.com/alexflint/go-arg"
"google.golang.org/api/option"
)
var args struct {
DocumentID string `arg:"required,env:DOCUMENT_ID"`
PaperlessToken string `arg:"required,env:PAPERLESS_TOKEN"`
PaperlessEndpoint string `arg:"required,env:PAPERLESS_ENDPOINT"`
InputFile string `arg:"required,env:DOCUMENT_SOURCE_PATH"`
DocumentAIProjectID string `arg:"required,env:DOCUMENTAI_PROJECT_ID"`
DocumentAILocation string `arg:"required,env:DOCUMENTAI_LOCATION"`
DocumentAIProcessorID string `arg:"required,env:DOCUMENTAI_PROCESSOR_ID"`
DocumentAIProcessorVersion string `arg:"required,env:DOCUMENTAI_PROCESSOR_VERSION"`
GoogleCredentialsFile string `arg:"required,env:GOOGLE_APPLICATION_CREDENTIALS"`
TestString string `arg:"env:TEST_STRING"`
}
func main() {
arg.MustParse(&args)
inputBytes, err := ioutil.ReadFile(args.InputFile)
if err != nil {
log.Fatal(err)
}
var ocrText string
if args.TestString == "" {
ctx := context.Background()
c, err := documentai.NewDocumentProcessorClient(ctx,
option.WithCredentialsFile(args.GoogleCredentialsFile),
option.WithEndpoint(args.DocumentAILocation+"-documentai.googleapis.com:443"))
if err != nil {
log.Fatalf("Cannot create client: %v", err)
}
defer c.Close()
processorName := fmt.Sprintf("projects/%s/locations/%s/processors/%s/processorVersions/%s", args.DocumentAIProjectID, args.DocumentAILocation, args.DocumentAIProcessorID, args.DocumentAIProcessorVersion)
log.Printf("Processing file: %s, ID: %s", args.InputFile, args.DocumentID)
req := &documentaipb.ProcessRequest{
Name: processorName,
Source: &documentaipb.ProcessRequest_RawDocument{
RawDocument: &documentaipb.RawDocument{
Content: inputBytes,
MimeType: "application/pdf",
},
},
}
resp, err := c.ProcessDocument(ctx, req)
if err != nil {
log.Fatalf("Cannot process document: %v", err)
}
ocrText = resp.Document.GetText()
} else {
log.Printf("Skipping OCR, pasting %s", args.TestString)
ocrText = args.TestString
}
log.Printf("Fixing OCR text...")
httpClient := &http.Client{}
body, errEnc := json.Marshal(map[string]string{"content": ocrText})
if errEnc != nil {
log.Fatalf("Cannot encode response: %v", errEnc)
}
patchReq, _ := http.NewRequest("PATCH", fmt.Sprintf("%s/api/documents/%s/", args.PaperlessEndpoint, args.DocumentID), bytes.NewReader(body))
patchReq.Header.Add("Content-Type", "application/json")
patchReq.Header.Add("Accept", "application/json; version=2")
patchReq.Header.Add("Authorization", fmt.Sprintf("Token %s", args.PaperlessToken))
if resp, err := httpClient.Do(patchReq); err != nil {
log.Fatalf("Cannot patch document: %v", err)
} else {
bb, _ := ioutil.ReadAll(resp.Body)
log.Printf("Patched document: %s, %s", resp.Status, string(bb))
}
log.Println("Sucessfully patched!")
}