From e529265a0cf335853464963daa03764c66a1cf1a Mon Sep 17 00:00:00 2001 From: Noteworthy Date: Sat, 3 Sep 2022 21:27:58 +1000 Subject: [PATCH] feat: guess file extension and include PE signature (#379) * feat: create a new entry inside PE to hold the signature status * chore: bump exiftool version * feat: guess file extension based on content * chore: don't fail fast in github action release/avupdate --- .github/workflows/av-engine-update.yaml | 1 + .github/workflows/release.yaml | 1 + Makefile | 1 + build/docker/Dockerfile.meta | 2 +- build/mk/exiftool.mk | 6 +- build/mk/trid.mk | 1 + build/mk/yarn.mk | 5 + go.mod | 2 + go.sum | 4 + services/aggregator/aggregator.go | 2 +- services/meta/meta.go | 25 ++- services/meta/sigs.go | 195 ++++++++++++++++++++++-- services/pe/pe.go | 26 +++- 13 files changed, 236 insertions(+), 35 deletions(-) create mode 100644 build/mk/yarn.mk diff --git a/.github/workflows/av-engine-update.yaml b/.github/workflows/av-engine-update.yaml index 69ea5b40..9971b674 100644 --- a/.github/workflows/av-engine-update.yaml +++ b/.github/workflows/av-engine-update.yaml @@ -9,6 +9,7 @@ jobs: release: runs-on: ubuntu-20.04 strategy: + fail-fast: false matrix: service: - goavira diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 968ba3a9..e13354ec 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -7,6 +7,7 @@ jobs: release: runs-on: ubuntu-20.04 strategy: + fail-fast: false matrix: service: - pe diff --git a/Makefile b/Makefile index be4e5487..549c18f6 100644 --- a/Makefile +++ b/Makefile @@ -58,3 +58,4 @@ include build/mk/saferwall.mk include build/mk/elastic.mk include build/mk/vagrant.mk include build/mk/github.mk +include build/mk/yarn.mk diff --git a/build/docker/Dockerfile.meta b/build/docker/Dockerfile.meta index 51f620b3..e7b69111 100644 --- a/build/docker/Dockerfile.meta +++ b/build/docker/Dockerfile.meta @@ -25,7 +25,7 @@ RUN echo "Installing TRiD..." \ && mv triddefs.trd /usr/bin/ ####### Installing Exiftool ####### -ENV EXIF_VER 12.42 +ENV EXIF_VER 12.44 RUN echo "Installing Exiftool..." \ && wget https://exiftool.org/Image-ExifTool-$EXIF_VER.tar.gz \ && gzip -dc Image-ExifTool-$EXIF_VER.tar.gz | tar -xf - \ diff --git a/build/mk/exiftool.mk b/build/mk/exiftool.mk index c3540c87..a279aea0 100644 --- a/build/mk/exiftool.mk +++ b/build/mk/exiftool.mk @@ -1,11 +1,11 @@ -EXIF_VER = 12.41 +EXIF_VER = 12.44 exiftool-install: # Install ExifTool - sudo apt-get -qq update wget https://exiftool.org/Image-ExifTool-$(EXIF_VER).tar.gz gzip -dc Image-ExifTool-$(EXIF_VER).tar.gz | tar -xf - cd Image-ExifTool-$(EXIF_VER) \ && perl Makefile.PL \ && make test \ && sudo make install + cd Image-ExifTool-$(EXIF_VER) && sudo cp -r exiftool lib /usr/local/bin rm Image-ExifTool-$(EXIF_VER).tar.gz - rm -r Image-ExifTool-$(EXIF_VER) \ No newline at end of file + rm -rf Image-ExifTool-$(EXIF_VER) diff --git a/build/mk/trid.mk b/build/mk/trid.mk index 0816e7f8..a81c97b5 100644 --- a/build/mk/trid.mk +++ b/build/mk/trid.mk @@ -11,3 +11,4 @@ trid-install: ## Install TRiD sudo mv /tmp/trid /usr/bin/ sudo mv /tmp/triddefs.trd /usr/bin/ chmod +x /usr/bin/trid + ## export LC_ALL=C diff --git a/build/mk/yarn.mk b/build/mk/yarn.mk new file mode 100644 index 00000000..68d1f6e9 --- /dev/null +++ b/build/mk/yarn.mk @@ -0,0 +1,5 @@ +yarn-install: ## Install Yarn + curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add - + echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list + sudo apt-get update && sudo apt-get install yarn -y + yarn --version diff --git a/go.mod b/go.mod index 92bca9da..df159eb3 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/djherbis/times v1.5.0 github.com/gabriel-vasile/mimetype v1.4.1 github.com/glaslos/ssdeep v0.3.2 + github.com/go-enry/go-enry/v2 v2.8.2 github.com/golang/protobuf v1.5.2 github.com/hillu/go-yara/v4 v4.2.3 github.com/minio/minio-go/v7 v7.0.10 @@ -27,6 +28,7 @@ require ( github.com/couchbase/gocbcore/v10 v10.1.2 // indirect github.com/edsrzf/mmap-go v1.0.0 // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect + github.com/go-enry/go-oniguruma v1.2.1 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.3.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect diff --git a/go.sum b/go.sum index b9a6cd7f..97f9b213 100644 --- a/go.sum +++ b/go.sum @@ -142,6 +142,10 @@ github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3r github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/glaslos/ssdeep v0.3.2 h1:zvrj04/Wwhpv1JyYDC+bGzsVX3Gs0CwaLQ/w3VHs+SM= github.com/glaslos/ssdeep v0.3.2/go.mod h1:04PaN40+DUokVJ6JqCJbSWr1Tm7E4toyUsvM8ujtZv4= +github.com/go-enry/go-enry/v2 v2.8.2 h1:uiGmC+3K8sVd/6DOe2AOJEOihJdqda83nPyJNtMR8RI= +github.com/go-enry/go-enry/v2 v2.8.2/go.mod h1:GVzIiAytiS5uT/QiuakK7TF1u4xDab87Y8V5EJRpsIQ= +github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= +github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= diff --git a/services/aggregator/aggregator.go b/services/aggregator/aggregator.go index a004501a..e7f89e18 100644 --- a/services/aggregator/aggregator.go +++ b/services/aggregator/aggregator.go @@ -37,7 +37,7 @@ type Service struct { db store.DB } -// New create a new PE scanner service. +// New create a new aggregator scanner service. func New(cfg Config, logger log.Logger) (Service, error) { svc := Service{} diff --git a/services/meta/meta.go b/services/meta/meta.go index e03fa2ac..6824a0f7 100644 --- a/services/meta/meta.go +++ b/services/meta/meta.go @@ -136,18 +136,26 @@ func (s *Service) HandleMessage(m *gonsq.Message) error { logger.Errorf("packer scan failed with: %v", err) } - // Determine type. - var format string + // Determine file format. + var fileFormat string + var fileExt string for k, v := range typeMap { - if strings.HasPrefix(magicRes, k) { - format = v + if strings.Contains(magicRes, k) { + fileFormat = v break } } - if len(format) == 0 { - format = "unknown" + if len(fileFormat) == 0 { + fileFormat = "unknown" + fileExt = "unknown" } - logger.Debugf("file format is: %s", format) + logger.Debugf("file format is: %s", fileFormat) + + // Determine file extension. + if fileFormat != "unknown" { + fileExt = guessFileExtension(data, magicRes, fileFormat, tridRes) + } + logger.Debugf("file extension is: %s", fileExt) // Extract strings. asciiStrings := str.GetASCIIStrings(&data, maxStrLength) @@ -195,7 +203,8 @@ func (s *Service) HandleMessage(m *gonsq.Message) error { {Module: "strings", Body: toJSON(stringRes)}, {Module: "histogram", Body: toJSON(bs.ByteHistogram(data))}, {Module: "byte_entropy", Body: toJSON(bs.ByteEntropyHistogram(data))}, - {Module: "fileformat", Body: toJSON(format)}, + {Module: "fileformat", Body: toJSON(fileFormat)}, + {Module: "file_extension", Body: toJSON(fileExt)}, } msg := &pb.Message{Sha256: sha256, Payload: payloads} diff --git a/services/meta/sigs.go b/services/meta/sigs.go index 02d8df9a..2992a7a6 100644 --- a/services/meta/sigs.go +++ b/services/meta/sigs.go @@ -4,6 +4,12 @@ package meta +import ( + "strings" + + "github.com/go-enry/go-enry/v2" +) + // Compilers, Installers, Packers names as seen by DiE (Detect It Easy) // This map a signature name substring into a tag. var sigMap = map[string]string{ @@ -77,26 +83,183 @@ var sigMap = map[string]string{ } var typeMap = map[string]string{ - "MS-DOS": "msdos", - "PE32": "pe", - "ELF": "elf", - "Mach-O": "mach-o", - "MS Windows shortcut": "lnk", - "XML": "xml", - "HTML": "html", - "PDF": "pdf", - "Macromedia Flash": "swf", - "DOS batch file": "bat", + // binary executables + "PE32": "pe", + "ELF": "elf", + "Mach-O": "mach-o", + "MS Windows shortcut": "lnk", + "MS-DOS": "msdos", + + // documents + "PDF document": "pdf", + "Rich Text Format": "rtf", + "Microsoft Word 2007+": "ooxml", + "Microsoft Excel 2007+": "ooxml", + "Microsoft PowerPoint 2007+": "ooxml", + "Composite Document File V2 Document": "ole2", + + // images and media + "PC bitmap": "bmp", + "JPEG image data": "jpeg", + "PNG image data": "png", + "GIF image data": "gif", + "SVG Scalable Vector": "svg", + "Macromedia Flash": "swf", + + // archives "Zip archive data": "zip", "RAR archive data": "rar", "7-zip archive data": "7-zip", "gzip compressed data": "gzip", + "bzip2 compressed data": "bzip2", + "tar archive": "tar", + "XZ compressed data": "xz", "Java archive data (JAR)": "jar", - "JPEG image data": "jpeg", - "PNG image data": "png", - "GIF image data": "gif", - "SVG Scalable Vector": "svg", - "Rich Text Format": "rtf", - "ISO 9660 CD-ROM": "iso", "Microsoft Cabinet archive": "cab", + + // misc + "ISO 9660 CD-ROM": "iso", + + // text-based: xml, html, js, hta, swf, .... + "ASCII text": "txt", + "Unicode text": "txt", + "ISO-8859 text": "txt", + "Unicode (with BOM) text": "txt", +} + +func guessFileExtension(data []byte, magic string, format string, trid []string) string { + + switch format { + case "lnk": + return "lnk" + + // documents + case "pdf": + return "pdf" + case "rtf": + return "rtf" + case "ooxml": + // for now we assume that it is .X, later, we need to do more parsing + // to figure out the exact extension (docm, dotm, ...) + if strings.Contains(magic, "Word") { + return "docx" + } else if strings.Contains(magic, "Excel") { + return "xlsx" + } else if strings.Contains(magic, "PowerPoint") { + return "pptx" + } + case "ole2": + // same remark as with ooxml. + if strings.Contains(magic, "Word") { + return "doc" + } else if strings.Contains(magic, "Excel") { + return "xls" + } else if strings.Contains(magic, "PowerPoint") { + return "ppt" + } + // If file magic does not work, try trid. + if len(trid) > 0 { + tridOut := trid[0] + if strings.Contains(tridOut, "Publisher") { + return "pub" + } + } + // images and media + case "bmp": + return "bmp" + case "jpeg": + return "jpeg" + case "png": + return "png" + case "gif": + return "gif" + case "svg": + return "svg" + case "swf": + return "swf" + + // archives + case "zip": + return "zip" + case "rar": + return "rar" + case "7-zip": + return "7z" + case "gzip": + return "gz" + case "bzip2": + return "bz2" + case "tar": + return "tar" + case "xz": + return "xz" + case "jar": + return "jar" + case "cab": + return "cab" + + // misc + case "iso": + return "iso" + + // txt based files: powershell, batch, html, javascript, + // vbscript, jscript, wsf, hta + + case "txt": + // Order matters in this logic. + if IsWsf(data) { + return "wsf" + } + + // HTML or HTA + if strings.Contains(magic, "HTML document") { + if IsHtmlApp(data) { + return "hta" + } + + return "html" + } + + lang, _ := enry.GetLanguageByClassifier(data, []string{ + "powershell", "batch", "vbscript", "javascript"}) + switch lang { + case "PowerShell": + return "ps1" + case "JavaScript": + return "js" + case "VBScript": + return "vbs" + case "Batchfile": + return "bat" + } + + } + return "?" +} + +func IsHtmlApp(data []byte) bool { + + content := strings.ToLower(string(data)) + content = strings.Join(strings.Fields(content), " ") + if strings.Contains(content, "") { + return true + } + + return false } diff --git a/services/pe/pe.go b/services/pe/pe.go index 9356cdd3..ecd2eb9a 100644 --- a/services/pe/pe.go +++ b/services/pe/pe.go @@ -97,19 +97,24 @@ func (s *Service) HandleMessage(m *gonsq.Message) error { return nil } - // Extract some PE related tags. + // Extract PE related tags and file extension. var tags []string + var ext string if file.IsEXE() { tags = append(tags, "exe") + ext = "exe" } else if file.IsDLL() { tags = append(tags, "sys") + ext = "sys" } else if file.IsDriver() { tags = append(tags, "dll") + ext = "dll" } payloads := []*pb.Message_Payload{ {Module: "pe", Body: curate(file)}, {Module: "tags.pe", Body: toJSON(tags)}, + {Module: "file_extension", Body: toJSON(ext)}, } file.Close() @@ -205,11 +210,6 @@ func curate(file *pe.File) []byte { fields = append(fields, "exception") } - if file.HasSecurity { - m["security"] = file.Certificates - fields = append(fields, "security") - } - if file.HasReloc { m["reloc"] = file.Relocations fields = append(fields, "reloc") @@ -255,6 +255,20 @@ func curate(file *pe.File) []byte { fields = append(fields, "clr") } + if file.HasSecurity { + m["security"] = file.Certificates + if file.IsSigned { + if file.Certificates.Verified { + m["signature"] = "Signed file, valid signature" + } else { + m["signature"] = "Signed file, invalid signature" + } + } + fields = append(fields, "security") + } else { + m["signature"] = "File is not signed" + } + m["meta"] = fields return toJSON(m) }