Skip to content
Permalink
Browse files Browse the repository at this point in the history
Fix remote code execution vulnerability in the PDF OCR converter (#110)
ENG-3252
  • Loading branch information
helenamariano committed Jul 7, 2022
1 parent 44d6739 commit b19021a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 1 deletion.
24 changes: 23 additions & 1 deletion pdf_ocr.go
@@ -1,3 +1,4 @@
//go:build ocr
// +build ocr

package docconv
Expand All @@ -10,6 +11,7 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"sync"
)
Expand Down Expand Up @@ -111,7 +113,8 @@ func ConvertPDFImages(path string) (BodyResult, error) {
// PdfHasImage verify if `path` (PDF) has images
func PDFHasImage(path string) bool {
cmd := "pdffonts -l 5 %s | tail -n +3 | cut -d' ' -f1 | sort | uniq"
out, err := exec.Command("bash", "-c", fmt.Sprintf(cmd, path)).Output()
out, err := exec.Command("bash", "-c", fmt.Sprintf(cmd, shellEscape(path))).CombinedOutput()

if err != nil {
log.Println(err)
return false
Expand Down Expand Up @@ -159,3 +162,22 @@ func ConvertPDF(r io.Reader) (string, map[string]string, error) {
return fullBody, metaResult.meta, nil

}

var shellEscapePattern *regexp.Regexp

func init() {
shellEscapePattern = regexp.MustCompile(`[^\w@%+=:,./-]`)
}

// shellEscape returns a shell-escaped version of the string s. The returned value
// is a string that can safely be used as one token in a shell command line.
func shellEscape(s string) string {
if len(s) == 0 {
return "''"
}
if shellEscapePattern.MatchString(s) {
return "'" + strings.Replace(s, "'", "'\"'\"'", -1) + "'"
}

return s
}
30 changes: 30 additions & 0 deletions pdf_ocr_test.go
@@ -0,0 +1,30 @@
//go:build ocr
// +build ocr

package docconv

import (
"os"
"testing"
)

func TestPDFHasImage_CannotExecuteCode(t *testing.T) {
// Try to inject code by passing a bad file path.
// If the code was successful it will create a file called foo in the working directory
badFilePath := "$(id >> foo).pdf"
if got, want := PDFHasImage(badFilePath), false; got != want {
t.Errorf("got %v, want %v", got, want)
}

if got, want := fileExists("foo"), false; got != want {
t.Errorf("got bad file exists, want not file to exist")
}
}

func fileExists(filename string) bool {
info, err := os.Stat(filename)
if os.IsNotExist(err) {
return false
}
return !info.IsDir()
}

0 comments on commit b19021a

Please sign in to comment.