## PE file Preprocessing Pipeline

---
---

## Step-1: PE file Parsing using LIEF
---


In [None]:
import lief

# Parse the PE file using LIEF
binary = lief.PE.parse("malware_sample.exe")

# Verify successful parsing
if not binary:
    print("Failed to parse PE file")
    exit(1)

# Display basic PE information
print(f"Number of sections: {len(binary.sections)}")
print(f"Entry point: 0x{binary.optional_header.addressof_entrypoint:x}")


## Step-2: Target Section Identification
---


In [None]:
# Define the five target sections from the paper
target_sections = [".text", ".data", ".rdata", ".rsrc", ".reloc"]

section_data = {}

# Extract each target section
for target_section in target_sections:
    section_found = False

    for section in binary.sections:
        # Clean section name (remove null bytes)
        section_name = section.name.strip('\x00')

        if section_name == target_section:
            # Extract section content as bytes
            content = section.content
            section_data[target_section] = content
            section_found = True

            print(f"Found {target_section}: {len(content)} bytes")
            break

    # Handle missing sections (assign -1 score as per paper)
    if not section_found:
        section_data[target_section] = None
        print(f"Section {target_section} not found - will receive -1 score")
