In [166]:
from pathlib import Path
from pprint import pprint
import sys

import lief
from lief import PE
import numpy as np
import pandas as pd
import pefile

In [2]:
path = "../outputs/5/files/0003f09b129fac317bdf19e32dca9e189045d89c23eeeb9868861ac5f8a3e211.exe"

# Parse and Manipulate Formats

In [3]:
binary = lief.parse(path)

Unable to find the section associated with CERTIFICATE_TABLE


In [4]:
print(binary.dos_header)
print("-" * 32)
print(binary.header)
print("-" * 32)
print(binary.optional_header)

Magic:                        5a4d
Used Bytes In The LastPage:   90
File Size In Pages:           3
Number Of Relocation:         0
Header Size In Paragraphs:    4
Minimum Extra Paragraphs:     0
Maximum Extra Paragraphs:     ffff
Initial Relative SS:          0
Initial SP:                   b8
Checksum:                     0
Initial IP:                   0
Initial Relative CS:          0
Address Of Relocation Table:  40
Overlay Number:               0
OEM id:                       0
OEM info:                     0
Address Of New Exe Header:    c8

--------------------------------
Signature:                    50 45 0 0 
Machine:                      UNKNOWN
Number Of Sections:           5
Pointer To Symbol Table:      0
Number Of Symbols:            0
Size Of Optional Header:      e0
Characteristics:              RELOCS_STRIPPED - EXECUTABLE_IMAGE - LINE_NUMS_STRIPPED - LOCAL_SYMS_STRIPPED - CHARA_32BIT_MACHINE
Time Date Stamp:              57956397

--------------------------------
M

In [5]:
print(binary.get_section(".text"))

.text     615d      1000      6200      400       0         6.47403   CNT_CODE - MEM_EXECUTE - MEM_READ


In [6]:
for func in binary.imported_functions:
  print(func)

SetCurrentDirectoryW - 0x8070
GetFileAttributesW - 0x8074
GetFullPathNameW - 0x8078
Sleep - 0x807c
GetTickCount - 0x8080
CreateFileW - 0x8084
GetFileSize - 0x8088
MoveFileW - 0x808c
SetFileAttributesW - 0x8090
GetModuleFileNameW - 0x8094
CopyFileW - 0x8098
ExitProcess - 0x809c
SetEnvironmentVar... - 0x80a0
GetWindowsDirectoryW - 0x80a4
GetTempPathW - 0x80a8
GetCommandLineW - 0x80ac
GetVersion - 0x80b0
SetErrorMode - 0x80b4
WaitForSingleObject - 0x80b8
GetCurrentProcess - 0x80bc
CompareFileTime - 0x80c0
GlobalUnlock - 0x80c4
GlobalLock - 0x80c8
CreateThread - 0x80cc
GetLastError - 0x80d0
CreateDirectoryW - 0x80d4
CreateProcessW - 0x80d8
RemoveDirectoryW - 0x80dc
lstrcmpiA - 0x80e0
GetTempFileNameW - 0x80e4
WriteFile - 0x80e8
lstrcpyA - 0x80ec
lstrcpyW - 0x80f0
MoveFileExW - 0x80f4
lstrcatW - 0x80f8
GetSystemDirectoryW - 0x80fc
GetProcAddress - 0x8100
GetModuleHandleA - 0x8104
GlobalFree - 0x8108
GlobalAlloc - 0x810c
GetShortPathNameW - 0x8110
SearchPathW - 0x8114
lstrcmpiW - 0x8118
SetF

In [7]:
for imported_library in binary.imports:
  print("Library name: " + imported_library.name)
  for func in imported_library.entries:
    if not func.is_ordinal:
      print(func.name)
    print(func.iat_address)

Library name: KERNEL32.dll
SetCurrentDirectoryW
32880
GetFileAttributesW
32884
GetFullPathNameW
32888
Sleep
32892
GetTickCount
32896
CreateFileW
32900
GetFileSize
32904
MoveFileW
32908
SetFileAttributesW
32912
GetModuleFileNameW
32916
CopyFileW
32920
ExitProcess
32924
SetEnvironmentVariableW
32928
GetWindowsDirectoryW
32932
GetTempPathW
32936
GetCommandLineW
32940
GetVersion
32944
SetErrorMode
32948
WaitForSingleObject
32952
GetCurrentProcess
32956
CompareFileTime
32960
GlobalUnlock
32964
GlobalLock
32968
CreateThread
32972
GetLastError
32976
CreateDirectoryW
32980
CreateProcessW
32984
RemoveDirectoryW
32988
lstrcmpiA
32992
GetTempFileNameW
32996
WriteFile
33000
lstrcpyA
33004
lstrcpyW
33008
MoveFileExW
33012
lstrcatW
33016
GetSystemDirectoryW
33020
GetProcAddress
33024
GetModuleHandleA
33028
GlobalFree
33032
GlobalAlloc
33036
GetShortPathNameW
33040
SearchPathW
33044
lstrcmpiW
33048
SetFileTime
33052
CloseHandle
33056
ExpandEnvironmentStringsW
33060
lstrcmpW
33064
GetDiskFreeSpaceW
33

In [8]:
builder = lief.PE.Builder(binary)
builder.build_imports(True)
builder.patch_imports(True)
builder.build()
builder.write("result.exe")

# Create a PE from Scratch

In [9]:
title   = "LIEF is awesome\0"
message = "Hello World\0"

data =  list(map(ord, title))
data += list(map(ord, message))

code = [
        0x6a, 0x00,                         # push 0x00 uType
        0x68, 0x00, 0x20, 0x40, 0x00,       # push VA(title)
        0x68, 0x10, 0x20, 0x40, 0x00,       # push VA(message)
        0x6a, 0x00,                         # push 0 hWnd
        0xFF, 0x15, 0x54, 0x30, 0x40, 0x00, # call MessageBoxA
        0x6A, 0x00,                         # push 0 uExitCode
        0xFF, 0x15, 0x4C, 0x30, 0x40, 0x00  # call ExitProcess
        ]

In [10]:
section_text                 = PE.Section(".text")
section_text.content         = code
section_text.virtual_address = 0x1000

section_data                 = PE.Section(".data")
section_data.content         = data
section_data.virtual_address = 0x2000

In [11]:
binary32 = PE.Binary("pe_from_scratch", PE.PE_TYPE.PE32)

In [12]:
section_text = binary32.add_section(section_text, PE.SECTION_TYPES.TEXT)
section_data = binary32.add_section(section_data, PE.SECTION_TYPES.DATA)
print(section_text)
print(section_data)

.text     1c        1000      200       400       0         0.362359  CNT_CODE - MEM_EXECUTE - MEM_READ
.data     1c        2000      200       600       0         0.487347  CNT_INITIALIZED_DATA - MEM_READ - MEM_WRITE


In [13]:
binary32.optional_header.addressof_entrypoint = section_text.virtual_address

kernel32 = binary32.add_library("kernel32.dll")
kernel32.add_entry("ExitProcess")

user32 = binary32.add_library("user32.dll")
user32.add_entry("MessageBoxA")

<lief.PE.ImportEntry at 0x7f0c34455a30>

In [14]:
ExitProcess_addr = binary32.predict_function_rva("kernel32.dll", "ExitProcess")
MessageBoxA_addr = binary32.predict_function_rva("user32.dll", "MessageBoxA")
print("Address of 'ExitProcess': 0x{:06x} ".format(ExitProcess_addr))
print("Address of 'MessageBoxA': 0x{:06x} ".format(MessageBoxA_addr))

Address of 'ExitProcess': 0x00304c 
Address of 'MessageBoxA': 0x003054 


In [15]:
builder = PE.Builder(binary32)
builder.build_imports(True)
builder.build()
builder.write("pe_from_scratch.exe")

# PE Hooking

In [16]:
title = "LIEF is awesome\0"
data  =  list(map(ord, title))
code = [
        0x48, 0x83, 0xc4, 0x48,                                     # add rsp, 0x48         ; Stack unwind
        0x48, 0x31, 0xc9,                                           # xor rcx, rcx          ; hWnd
        0x48, 0x89, 0xd2,                                           # mov rdx, rdx          ; Message
        0x49, 0xb8, 0x00, 0x90, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, # mov r8,  0x0140009000 ; Title
        0x4d, 0x31, 0xc9,                                           # xor r9, r9            ; MB_OK
        0x48, 0xb8, 0xe4, 0xa3, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, # mov rax, 0x014000A3E4 ; MessageBoxA address
        0xff, 0x10,                                                 # call [rax]            ; MessageBoxA(hWnd, Message, Title, MB_OK)
        0x48, 0x31, 0xc9,                                           # xor rcx, rcx          ; exit value
        0x48, 0xb8, 0xd4, 0xa3, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, # mov rax, 0x014000A3d4 ; ExitProcess address
        0xff, 0x10,                                                 # call [rax]            ; ExitProcess(0)
        0xc3,                                                       # ret                   ; Never reached
        ]

In [17]:
# Create a '.text' section which will contain the hooking code
section_text                 = lief.PE.Section(".htext")
section_text.content         = code
section_text.virtual_address = 0x8000
section_text.characteristics = (lief.PE.SECTION_CHARACTERISTICS.CNT_CODE | lief.PE.SECTION_CHARACTERISTICS.MEM_READ | lief.PE.SECTION_CHARACTERISTICS.MEM_EXECUTE)
print(section_text)

.htext    0         8000      0         0         0         3.76004   CNT_CODE - MEM_EXECUTE - MEM_READ


In [18]:
# Create '.data' section for the string(s)
section_data                 = lief.PE.Section(".hdata")
section_data.content         = data
section_data.virtual_address = 0x9000
section_data.characteristics = (lief.PE.SECTION_CHARACTERISTICS.CNT_INITIALIZED_DATA | lief.PE.SECTION_CHARACTERISTICS.MEM_READ)
print(section_data)

.hdata    0         9000      0         0         0         3.625     CNT_INITIALIZED_DATA - MEM_READ


In [19]:
binary = lief.parse("PE64_x86-64_binary_HelloWorld.exe")
# Disable ASLR
binary.optional_header.dll_characteristics &= ~lief.PE.DLL_CHARACTERISTICS.DYNAMIC_BASE # bitwise not
# Disable NX protection
binary.optional_header.dll_characteristics &= ~lief.PE.DLL_CHARACTERISTICS.NX_COMPAT # bitwise not

In [20]:
# Add the sections
section_text = binary.add_section(section_text)
section_data = binary.add_section(section_data)

# Add the 'ExitProcess' function to kernel32
kernel32 = binary.get_import("KERNEL32.dll")
kernel32.add_entry("ExitProcess")

# Add the 'user32.dll' library
user32 = binary.add_library("user32.dll")

# Add the 'MessageBoxA' function
user32.add_entry("MessageBoxA")

<lief.PE.ImportEntry at 0x7f0c3444bc70>

In [21]:
ExitProcess_addr = binary.predict_function_rva("KERNEL32.dll", "ExitProcess")
MessageBoxA_addr = binary.predict_function_rva("user32.dll", "MessageBoxA")
print("Address of 'MessageBoxA': 0x{:06x} ".format(MessageBoxA_addr))
print("Address of 'ExitProcess': 0x{:06x} ".format(ExitProcess_addr))

Address of 'MessageBoxA': 0x00a3e4 
Address of 'ExitProcess': 0x00a3d4 


In [22]:
# Hook the '__acrt_iob_func' function with our code
binary.hook_function("__acrt_iob_func", binary.optional_header.imagebase + section_text.virtual_address)
# Invoke the builder
builder = lief.PE.Builder(binary)
# Configure it to rebuild and patch the imports
builder.build_imports(True).patch_imports(True)
# Build !
builder.build()
# Save the result
builder.write("lief_pe64_hooking.exe")

# PE Resources

In [23]:
filezilla = lief.parse("filezilla.exe")
if not filezilla.has_resources:
    print("'{}' has no resources. Abort!".format(filezilla.name), file=sys.stderr)
    sys.exit(1)
root = filezilla.resources

In [24]:
childs = list(root.childs)
print(childs)
ids = [e.id for e in childs]
print(ids)
names = [type(e) for e in childs]
print(names)
filtered = list(filter(lambda e : e.id == lief.PE.RESOURCE_TYPES.MANIFEST, root.childs))
print(filtered)
lief.PE.RESOURCE_TYPES.MANIFEST

[<lief.PE.ResourceDirectory object at 0x7f0c34494930>, <lief.PE.ResourceDirectory object at 0x7f0c34494d70>, <lief.PE.ResourceDirectory object at 0x7f0c344949f0>, <lief.PE.ResourceDirectory object at 0x7f0c34494cf0>, <lief.PE.ResourceDirectory object at 0x7f0c344948f0>, <lief.PE.ResourceDirectory object at 0x7f0c344d12f0>, <lief.PE.ResourceDirectory object at 0x7f0c344d1a30>, <lief.PE.ResourceDirectory object at 0x7f0c344d11b0>]
[1, 2, 3, 4, 12, 14, 16, 24]
[<class 'lief.PE.ResourceDirectory'>, <class 'lief.PE.ResourceDirectory'>, <class 'lief.PE.ResourceDirectory'>, <class 'lief.PE.ResourceDirectory'>, <class 'lief.PE.ResourceDirectory'>, <class 'lief.PE.ResourceDirectory'>, <class 'lief.PE.ResourceDirectory'>, <class 'lief.PE.ResourceDirectory'>]
[]


<RESOURCE_TYPES.MANIFEST: 24>

In [25]:
# First level => Type
manifest_node = next(filter(lambda e : e.id == lief.PE.RESOURCE_TYPES.MANIFEST, root.childs))
print(manifest_node)

StopIteration: 

In [None]:
# Second level => ID
id_node = manifest_node.childs[0]
print(id_node)

In [None]:
# Third level => Lang (Data node)
lang_node = id_node.childs[0]
print(lang_node)

In [None]:
manifest = bytes(lang_node.content).decode("utf8")
print(manifest)

In [None]:
manifest = manifest.replace("asInvoker", "requireAdministrator")
print(manifest)

In [None]:
lang_node.content = list(manifest.encode("utf8"))

In [None]:
# Rebuild
builder = lief.PE.Builder(filezilla)
builder.build_resources(True)
builder.build()
builder.write("filezilla_v2.exe")

# Identify Offsets

In [None]:
import lief
path = "../outputs/5/files/0003f09b129fac317bdf19e32dca9e189045d89c23eeeb9868861ac5f8a3e211.exe"
binary = lief.parse(path)

In [None]:
section = binary.get_section(".text")

In [None]:
type(section)

In [None]:
section.offset, section.size
section.offset + section.size

In [None]:
sorted([s.offset for s in binary.sections])

In [None]:
try:
    binary.get_section("jhbfds")
except Exception as e:
    print(type(e))
    raise e

# Parse returning None

In [None]:
path = "/home/lk3591/Documents/datasets/Windows/processed/train/7e4c888acfca1c4068a76f1b61874554d7c3e4dc335f6d298ff8abbe.exe"
binary = lief.parse(path)

In [None]:
print(binary)

# Modifying Malware to Make it Less Suspicious

In [10]:
import capstone as cs
import lief
path = "../outputs/5/files/0003f09b129fac317bdf19e32dca9e189045d89c23eeeb9868861ac5f8a3e211.exe"
binary = lief.parse(path)

Unable to find the section associated with CERTIFICATE_TABLE


In [11]:
text = binary.get_section(".text")
content = [hex(i) for i in text.content]

In [12]:
cs

## Weird

In [19]:
from pathlib import Path
import lief
lief.__version__

'0.12.2-2169578d'

In [26]:
path = "/home/lk3591/Documents/datasets/Sorel/processed/train/0008c1b3154f651e089c62c9b289911f808dd85992e3392db2f5d443e821b4a6"
path_analogue = Path(path).name + "analogue"

In [21]:
binary = lief.parse(path)
binary.virtual_size

Can't read the padding content of section '.text'
Data of section section '.rdata' is too large (0xfffdb828)
Data of section section '.data' is too large (0xfffd4828)


368640

In [22]:
sections = list(binary.sections)
c = lief.PE.SECTION_CHARACTERISTICS(32)
for s in sections:
    print(f"{s.name}: {s.offset} - {s.offset + s.size} {s.has_characteristic(c)}")

.text: 4096 - 221184 True
.rdata: 221184 - 249856 False
.data: 249856 - 278528 False


In [14]:
binary.get_section(".text").content = [0x0 for _ in range(221184 - 4096)]

In [15]:
builder = lief.PE.Builder(binary)
builder.build_imports(True)
builder.build()
builder.write(path + "analogue")

In [16]:
b = open(path, "rb").read()
len(b)

71720

In [27]:
binary_analogue = lief.parse(path_analogue)
sections = list(binary_analogue.sections)

In [28]:
sections = list(binary_analogue.sections)
c = lief.PE.SECTION_CHARACTERISTICS(32)
for s in sections:
    print(f"{s.name}: {s.offset} - {s.offset + s.size} {s.has_characteristic(c)}")

.text: 4096 - 221184 True
.rdata: 221184 - 249856 False
.data: 249856 - 278528 False


In [29]:
b2 = open(path_analogue, "rb").read()
len(b2)

278528

In [34]:
binary_analogue.get_section(".text").size

217088

In [38]:
c = binary_analogue.get_section(".text").content

In [39]:
len(list(c))

214174

## Malware's offsets larger than file length

In [70]:
import lief

In [71]:
path = "/home/lk3591/Documents/datasets/Sorel/processed/train/001856267679fb6001cd00f230043326e1b9fc210874dd4c839761d51f4e9a73"

In [72]:
# LIEF's backend engine prints some complaints, but it does this for a lot of files
binary = lief.parse(path)

Can't read the padding content of section '.text'
Can't read the padding content of section '.data'
Data of section section '.rsrc' is too large (0xffff48db)


In [73]:
# Some information about the various sections within the executable
for s in binary.sections:
    print(f"Name: {s.name}\tOffset (start):{s.offset}\tOffset (end):{s.offset + s.size}")

Name: .text	Offset (start):4096	Offset (end):69632
Name: .data	Offset (start):0	Offset (end):0
Name: .rsrc	Offset (start):69632	Offset (end):98304


In [74]:
# We can determine the number of bytes in the file by reading them
raw = open(path, "rb").read()
len(raw)

22747

In [75]:
# The file appears not to be gzipped
import gzip
try:
    with gzip.open(path, "rb") as f:
        print(f.read())
except gzip.BadGzipFile:
    print("Not a gzipped file.")

Not a gzipped file.


In [78]:
text_section = binary.get_section(".text")

In [81]:
text_content = list(text_section.content)

In [92]:
dos_header = binary.dos_header
header = binary.header

In [101]:
header.pointerto_symbol_table

0

# Identifying .text sections in Malware/Benign-ware

In [18]:
import pefile
from torch import Tensor
from tqdm import tqdm

from classifier import (
    confidence_scores,
    get_model,
    MALCONV_PATH,
    SOREL_TRAIN_PATH,
    WINDOWS_TRAIN_PATH,
    WINDOWS_TEST_PATH,
)
from executable_helper import read_binary, text_section_bounds

In [2]:
def print_pe_summary(pe: pefile.PE):
    for s in pe.sections:
        section = s.Name.decode("utf-8", errors="ignore")
        addr = s.Misc_PhysicalAddress
        size = s.SizeOfRawData
        v_addr = s.VirtualAddress
        v_size = s.Misc_VirtualSize
        print(f"{section=}\n\t{addr=}\n\t{size=}")#\n\t{v_addr=}\n\t{v_size=}")

def file_size(path):
    return len(open(path, 'rb').read())

def get_text_section(pe: pefile.PE):
    for s in pe.sections:
        if ".text" in s.Name.decode("utf-8", errors="ignore"):
            return s
    return None

model = get_model(MALCONV_PATH)

In [3]:
# Strange malware with .text section larger than file length
path = SOREL_TRAIN_PATH / "001856267679fb6001cd00f230043326e1b9fc210874dd4c839761d51f4e9a73"
pe = pefile.PE(path)
ts = get_text_section(pe)
print(f"FileSize={file_size(path)}")
print(f"{ts.PointerToRawData=}")
print(f"{ts.SizeOfRawData=}")

FileSize=22747
ts.PointerToRawData=4096
ts.SizeOfRawData=65536


In [4]:
print(f"FileSize:{hex(file_size(path))}")
print(pe.dump_info())

FileSize:0x58db

Byte 0x00 makes up 51.8398% of the file's contents. This may indicate truncation / malformation.

Error parsing section 0. SizeOfRawData is larger than file.

Error parsing section 2. SizeOfRawData is larger than file.

Error parsing section 2. PointerToRawData points beyond the end of the file.

Corrupt header "IMAGE_IMPORT_DESCRIPTOR" at file offset 65508. Exception: 'Data length less than expected header length.'

Corrupt header "IMAGE_RESOURCE_DIRECTORY" at file offset 69632. Exception: 'Data length less than expected header length.'

Invalid resources directory. Can't parse directory data at RVA: 0x14000

----------DOS_HEADER----------

[IMAGE_DOS_HEADER]
0x0        0x0   e_magic:                       0x5A4D    
0x2        0x2   e_cblp:                        0x90      
0x4        0x4   e_cp:                          0x3       
0x6        0x6   e_crlc:                        0x0       
0x8        0x8   e_cparhdr:                     0x4       
0xA        0xA   e_

In [5]:
X = Tensor(read_binary(path)).unsqueeze(0)
conf = confidence_scores(model, X).item()
conf

0.998144268989563

In [6]:
# Normal malware
path = SOREL_TRAIN_PATH / "00264f6c56cf21a8bd3202fccd7758221f363ca027c474d84901bb9f5e5f67dd"
pe = pefile.PE(path)
print(f"FileSize={file_size(path)}")
print(f"{ts.PointerToRawData=}")
print(f"{ts.SizeOfRawData=}")

FileSize=1102372
ts.PointerToRawData=4096
ts.SizeOfRawData=65536


In [7]:
print(f"FileSize:{hex(file_size(path))}")
print(pe.dump_info())

FileSize:0x10d224

Error parsing section 1. SizeOfRawData is larger than file.

Error parsing section 2. SizeOfRawData is larger than file.

Error parsing section 2. PointerToRawData points beyond the end of the file.

Error parsing section 3. SizeOfRawData is larger than file.

Error parsing section 3. PointerToRawData points beyond the end of the file.

Error parsing section 4. SizeOfRawData is larger than file.

Error parsing section 4. PointerToRawData points beyond the end of the file.

Error parsing section 5. SizeOfRawData is larger than file.

Error parsing section 5. PointerToRawData points beyond the end of the file.

Error parsing section 6. SizeOfRawData is larger than file.

Error parsing section 6. PointerToRawData points beyond the end of the file.

Error parsing section 7. SizeOfRawData is larger than file.

Error parsing section 7. PointerToRawData points beyond the end of the file.

Too many errors parsing the import directory. Invalid import data at RVA: 0x10dac4

Co

In [8]:
X = Tensor(read_binary(path)).unsqueeze(0)
conf = confidence_scores(model, X).item()
conf

0.9983325600624084

In [9]:
# Benign-ware with a large .text section
path = WINDOWS_TRAIN_PATH / "09024e62ccab97df3b535e1d65025c54d2d8a684b9e6dcebba79786d.exe"
pe = pefile.PE(path)
print(f"FileSize={file_size(path)}")
print(f"{ts.PointerToRawData=}")
print(f"{ts.SizeOfRawData=}")

FileSize=4266712
ts.PointerToRawData=4096
ts.SizeOfRawData=65536


In [10]:
print(f"FileSize:{hex(file_size(path))}")
print(pe.dump_info())

FileSize:0x411ad8
----------DOS_HEADER----------

[IMAGE_DOS_HEADER]
0x0        0x0   e_magic:                       0x5A4D    
0x2        0x2   e_cblp:                        0x90      
0x4        0x4   e_cp:                          0x3       
0x6        0x6   e_crlc:                        0x0       
0x8        0x8   e_cparhdr:                     0x4       
0xA        0xA   e_minalloc:                    0x0       
0xC        0xC   e_maxalloc:                    0xFFFF    
0xE        0xE   e_ss:                          0x0       
0x10       0x10  e_sp:                          0xB8      
0x12       0x12  e_csum:                        0x0       
0x14       0x14  e_ip:                          0x0       
0x16       0x16  e_cs:                          0x0       
0x18       0x18  e_lfarlc:                      0x40      
0x1A       0x1A  e_ovno:                        0x0       
0x1C       0x1C  e_res:                         
0x24       0x24  e_oemid:                       0x0     

In [11]:
X = Tensor(read_binary(path)).unsqueeze(0)
conf = confidence_scores(model, X).item()
conf

0.0006344712455756962

In [12]:
# Benign-ware
path = WINDOWS_TRAIN_PATH / "f20a100e661a3179976ccf06ce4a773cbe8d19cd8f50f14e41c0a9e6.exe"
pe = pefile.PE(path)
print(f"FileSize={file_size(path)}")
print(f"{ts.PointerToRawData=}")
print(f"{ts.SizeOfRawData=}")

FileSize=116224
ts.PointerToRawData=4096
ts.SizeOfRawData=65536


In [13]:
print(f"FileSize:{hex(file_size(path))}")
print(pe.dump_info())

FileSize:0x1c600

Byte 0xcc makes up 26.9075% of the file's contents. This may indicate truncation / malformation.

----------DOS_HEADER----------

[IMAGE_DOS_HEADER]
0x0        0x0   e_magic:                       0x5A4D    
0x2        0x2   e_cblp:                        0x90      
0x4        0x4   e_cp:                          0x3       
0x6        0x6   e_crlc:                        0x0       
0x8        0x8   e_cparhdr:                     0x4       
0xA        0xA   e_minalloc:                    0x0       
0xC        0xC   e_maxalloc:                    0xFFFF    
0xE        0xE   e_ss:                          0x0       
0x10       0x10  e_sp:                          0xB8      
0x12       0x12  e_csum:                        0x0       
0x14       0x14  e_ip:                          0x0       
0x16       0x16  e_cs:                          0x0       
0x18       0x18  e_lfarlc:                      0x40      
0x1A       0x1A  e_ovno:                        0x0       
0x1C   

In [14]:
X = Tensor(read_binary(path)).unsqueeze(0)
conf = confidence_scores(model, X).item()
conf

3.3748142413969617e-06

In [21]:
data = []
for path in tqdm(list(WINDOWS_TEST_PATH.iterdir())):
    X = Tensor(read_binary(path)).unsqueeze(0)
    conf = confidence_scores(model, X).item()
    _, l, u = next(text_section_bounds(path, "pefile", "ignore"))
    data.append((path, l, u, conf))

  3%|▎         | 39/1464 [00:11<06:49,  3.48it/s]

Found 1 / 1 good files.





StopIteration: 

In [23]:
data[0:5]

[(PosixPath('/home/lk3591/Documents/datasets/Windows/processed/test/f6bb0b1f6a55cceb02598ba37f744d12b1b44064e1fd44c357c3303d.exe'),
  1024,
  39936,
  9.286905272576362e-11),
 (PosixPath('/home/lk3591/Documents/datasets/Windows/processed/test/d12a0fab485a1f045db0837fe337e9fd5b72c8aa37b399e145b90c39.exe'),
  1024,
  304640,
  3.468932118266821e-05),
 (PosixPath('/home/lk3591/Documents/datasets/Windows/processed/test/701f928760a612a1e929551ca12363394922f30c7f8181f4df5b0ec0.exe'),
  1024,
  2668032,
  1.5929343177845112e-08),
 (PosixPath('/home/lk3591/Documents/datasets/Windows/processed/test/509a8487983a3525b6045f701bb90d3346247584c2759bed1a78fbd5.exe'),
  4096,
  2568192,
  0.0002767131954897195),
 (PosixPath('/home/lk3591/Documents/datasets/Windows/processed/test/6e2b9d62a3d2c20439273751894a5f44dffe5eb8d00e39e45f1c56aa.exe'),
  1024,
  2668032,
  1.5929343177845112e-08)]