Skip to content

Commit

Permalink
feat: guess file extension and include PE signature (#379)
Browse files Browse the repository at this point in the history
* feat: create a new entry inside PE to hold the signature status

* chore: bump exiftool version

* feat: guess file extension based on content

* chore: don't fail fast in github action release/avupdate
  • Loading branch information
LordNoteworthy committed Sep 3, 2022
1 parent 458000b commit e529265
Show file tree
Hide file tree
Showing 13 changed files with 236 additions and 35 deletions.
1 change: 1 addition & 0 deletions .github/workflows/av-engine-update.yaml
Expand Up @@ -9,6 +9,7 @@ jobs:
release:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
service:
- goavira
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release.yaml
Expand Up @@ -7,6 +7,7 @@ jobs:
release:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
service:
- pe
Expand Down
1 change: 1 addition & 0 deletions Makefile
Expand Up @@ -58,3 +58,4 @@ include build/mk/saferwall.mk
include build/mk/elastic.mk
include build/mk/vagrant.mk
include build/mk/github.mk
include build/mk/yarn.mk
2 changes: 1 addition & 1 deletion build/docker/Dockerfile.meta
Expand Up @@ -25,7 +25,7 @@ RUN echo "Installing TRiD..." \
&& mv triddefs.trd /usr/bin/

####### Installing Exiftool #######
ENV EXIF_VER 12.42
ENV EXIF_VER 12.44
RUN echo "Installing Exiftool..." \
&& wget https://exiftool.org/Image-ExifTool-$EXIF_VER.tar.gz \
&& gzip -dc Image-ExifTool-$EXIF_VER.tar.gz | tar -xf - \
Expand Down
6 changes: 3 additions & 3 deletions build/mk/exiftool.mk
@@ -1,11 +1,11 @@
EXIF_VER = 12.41
EXIF_VER = 12.44
exiftool-install: # Install ExifTool
sudo apt-get -qq update
wget https://exiftool.org/Image-ExifTool-$(EXIF_VER).tar.gz
gzip -dc Image-ExifTool-$(EXIF_VER).tar.gz | tar -xf -
cd Image-ExifTool-$(EXIF_VER) \
&& perl Makefile.PL \
&& make test \
&& sudo make install
cd Image-ExifTool-$(EXIF_VER) && sudo cp -r exiftool lib /usr/local/bin
rm Image-ExifTool-$(EXIF_VER).tar.gz
rm -r Image-ExifTool-$(EXIF_VER)
rm -rf Image-ExifTool-$(EXIF_VER)
1 change: 1 addition & 0 deletions build/mk/trid.mk
Expand Up @@ -11,3 +11,4 @@ trid-install: ## Install TRiD
sudo mv /tmp/trid /usr/bin/
sudo mv /tmp/triddefs.trd /usr/bin/
chmod +x /usr/bin/trid
## export LC_ALL=C
5 changes: 5 additions & 0 deletions build/mk/yarn.mk
@@ -0,0 +1,5 @@
yarn-install: ## Install Yarn
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
sudo apt-get update && sudo apt-get install yarn -y
yarn --version
2 changes: 2 additions & 0 deletions go.mod
Expand Up @@ -10,6 +10,7 @@ require (
github.com/djherbis/times v1.5.0
github.com/gabriel-vasile/mimetype v1.4.1
github.com/glaslos/ssdeep v0.3.2
github.com/go-enry/go-enry/v2 v2.8.2
github.com/golang/protobuf v1.5.2
github.com/hillu/go-yara/v4 v4.2.3
github.com/minio/minio-go/v7 v7.0.10
Expand All @@ -27,6 +28,7 @@ require (
github.com/couchbase/gocbcore/v10 v10.1.2 // indirect
github.com/edsrzf/mmap-go v1.0.0 // indirect
github.com/fsnotify/fsnotify v1.5.4 // indirect
github.com/go-enry/go-oniguruma v1.2.1 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Expand Up @@ -142,6 +142,10 @@ github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3r
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/glaslos/ssdeep v0.3.2 h1:zvrj04/Wwhpv1JyYDC+bGzsVX3Gs0CwaLQ/w3VHs+SM=
github.com/glaslos/ssdeep v0.3.2/go.mod h1:04PaN40+DUokVJ6JqCJbSWr1Tm7E4toyUsvM8ujtZv4=
github.com/go-enry/go-enry/v2 v2.8.2 h1:uiGmC+3K8sVd/6DOe2AOJEOihJdqda83nPyJNtMR8RI=
github.com/go-enry/go-enry/v2 v2.8.2/go.mod h1:GVzIiAytiS5uT/QiuakK7TF1u4xDab87Y8V5EJRpsIQ=
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
Expand Down
2 changes: 1 addition & 1 deletion services/aggregator/aggregator.go
Expand Up @@ -37,7 +37,7 @@ type Service struct {
db store.DB
}

// New create a new PE scanner service.
// New create a new aggregator scanner service.
func New(cfg Config, logger log.Logger) (Service, error) {

svc := Service{}
Expand Down
25 changes: 17 additions & 8 deletions services/meta/meta.go
Expand Up @@ -136,18 +136,26 @@ func (s *Service) HandleMessage(m *gonsq.Message) error {
logger.Errorf("packer scan failed with: %v", err)
}

// Determine type.
var format string
// Determine file format.
var fileFormat string
var fileExt string
for k, v := range typeMap {
if strings.HasPrefix(magicRes, k) {
format = v
if strings.Contains(magicRes, k) {
fileFormat = v
break
}
}
if len(format) == 0 {
format = "unknown"
if len(fileFormat) == 0 {
fileFormat = "unknown"
fileExt = "unknown"
}
logger.Debugf("file format is: %s", format)
logger.Debugf("file format is: %s", fileFormat)

// Determine file extension.
if fileFormat != "unknown" {
fileExt = guessFileExtension(data, magicRes, fileFormat, tridRes)
}
logger.Debugf("file extension is: %s", fileExt)

// Extract strings.
asciiStrings := str.GetASCIIStrings(&data, maxStrLength)
Expand Down Expand Up @@ -195,7 +203,8 @@ func (s *Service) HandleMessage(m *gonsq.Message) error {
{Module: "strings", Body: toJSON(stringRes)},
{Module: "histogram", Body: toJSON(bs.ByteHistogram(data))},
{Module: "byte_entropy", Body: toJSON(bs.ByteEntropyHistogram(data))},
{Module: "fileformat", Body: toJSON(format)},
{Module: "fileformat", Body: toJSON(fileFormat)},
{Module: "file_extension", Body: toJSON(fileExt)},
}

msg := &pb.Message{Sha256: sha256, Payload: payloads}
Expand Down
195 changes: 179 additions & 16 deletions services/meta/sigs.go
Expand Up @@ -4,6 +4,12 @@

package meta

import (
"strings"

"github.com/go-enry/go-enry/v2"
)

// Compilers, Installers, Packers names as seen by DiE (Detect It Easy)
// This map a signature name substring into a tag.
var sigMap = map[string]string{
Expand Down Expand Up @@ -77,26 +83,183 @@ var sigMap = map[string]string{
}

var typeMap = map[string]string{
"MS-DOS": "msdos",
"PE32": "pe",
"ELF": "elf",
"Mach-O": "mach-o",
"MS Windows shortcut": "lnk",
"XML": "xml",
"HTML": "html",
"PDF": "pdf",
"Macromedia Flash": "swf",
"DOS batch file": "bat",
// binary executables
"PE32": "pe",
"ELF": "elf",
"Mach-O": "mach-o",
"MS Windows shortcut": "lnk",
"MS-DOS": "msdos",

// documents
"PDF document": "pdf",
"Rich Text Format": "rtf",
"Microsoft Word 2007+": "ooxml",
"Microsoft Excel 2007+": "ooxml",
"Microsoft PowerPoint 2007+": "ooxml",
"Composite Document File V2 Document": "ole2",

// images and media
"PC bitmap": "bmp",
"JPEG image data": "jpeg",
"PNG image data": "png",
"GIF image data": "gif",
"SVG Scalable Vector": "svg",
"Macromedia Flash": "swf",

// archives
"Zip archive data": "zip",
"RAR archive data": "rar",
"7-zip archive data": "7-zip",
"gzip compressed data": "gzip",
"bzip2 compressed data": "bzip2",
"tar archive": "tar",
"XZ compressed data": "xz",
"Java archive data (JAR)": "jar",
"JPEG image data": "jpeg",
"PNG image data": "png",
"GIF image data": "gif",
"SVG Scalable Vector": "svg",
"Rich Text Format": "rtf",
"ISO 9660 CD-ROM": "iso",
"Microsoft Cabinet archive": "cab",

// misc
"ISO 9660 CD-ROM": "iso",

// text-based: xml, html, js, hta, swf, ....
"ASCII text": "txt",
"Unicode text": "txt",
"ISO-8859 text": "txt",
"Unicode (with BOM) text": "txt",
}

func guessFileExtension(data []byte, magic string, format string, trid []string) string {

switch format {
case "lnk":
return "lnk"

// documents
case "pdf":
return "pdf"
case "rtf":
return "rtf"
case "ooxml":
// for now we assume that it is .X, later, we need to do more parsing
// to figure out the exact extension (docm, dotm, ...)
if strings.Contains(magic, "Word") {
return "docx"
} else if strings.Contains(magic, "Excel") {
return "xlsx"
} else if strings.Contains(magic, "PowerPoint") {
return "pptx"
}
case "ole2":
// same remark as with ooxml.
if strings.Contains(magic, "Word") {
return "doc"
} else if strings.Contains(magic, "Excel") {
return "xls"
} else if strings.Contains(magic, "PowerPoint") {
return "ppt"
}
// If file magic does not work, try trid.
if len(trid) > 0 {
tridOut := trid[0]
if strings.Contains(tridOut, "Publisher") {
return "pub"
}
}
// images and media
case "bmp":
return "bmp"
case "jpeg":
return "jpeg"
case "png":
return "png"
case "gif":
return "gif"
case "svg":
return "svg"
case "swf":
return "swf"

// archives
case "zip":
return "zip"
case "rar":
return "rar"
case "7-zip":
return "7z"
case "gzip":
return "gz"
case "bzip2":
return "bz2"
case "tar":
return "tar"
case "xz":
return "xz"
case "jar":
return "jar"
case "cab":
return "cab"

// misc
case "iso":
return "iso"

// txt based files: powershell, batch, html, javascript,
// vbscript, jscript, wsf, hta

case "txt":
// Order matters in this logic.
if IsWsf(data) {
return "wsf"
}

// HTML or HTA
if strings.Contains(magic, "HTML document") {
if IsHtmlApp(data) {
return "hta"
}

return "html"
}

lang, _ := enry.GetLanguageByClassifier(data, []string{
"powershell", "batch", "vbscript", "javascript"})
switch lang {
case "PowerShell":
return "ps1"
case "JavaScript":
return "js"
case "VBScript":
return "vbs"
case "Batchfile":
return "bat"
}

}
return "?"
}

func IsHtmlApp(data []byte) bool {

content := strings.ToLower(string(data))
content = strings.Join(strings.Fields(content), " ")
if strings.Contains(content, "<hta:application") ||
strings.Contains(content, "<script language=") ||
strings.Contains(content, "activexobject") {
return true
}

return false
}

func IsWsf(data []byte) bool {

content := strings.ToLower(string(data))
content = strings.Join(strings.Fields(content), " ")

if strings.Contains(content, "<job id=") ||
strings.Contains(content, "<script language=") ||
strings.Contains(content, "<package>") {
return true
}

return false
}

0 comments on commit e529265

Please sign in to comment.