In [4]:
import yara
import pefile
import json
import pandas as pd

def scan_with_yara(file_path, yara_rules):
    # Compile Yara rules
    rules = yara.compile(filepath=yara_rules)

    # Scan the file
    matches = rules.match(file_path)
    
    return matches

def find_pe_section(pe, rva):
    for section in pe.sections:
        if section.VirtualAddress <= rva < section.VirtualAddress + section.SizeOfRawData:
            return section
    return None

def analyze_file(file_path, yara_rules):
    # Scan the file with Yara
    matches = scan_with_yara(file_path, yara_rules)

    # Parse the file with pefile
    pe = pefile.PE(file_path)

    results = []

    # For each Yara match, find its PE section
    for match in matches:
        for string_data in match.strings:
            for string_instance in string_data.instances:
                offset = string_instance.offset
                matched_string = string_data.identifier
                rva = pe.get_rva_from_offset(offset)
                section = find_pe_section(pe, rva)
                
                if section:
                    results.append({
                        "rule": match.rule,
                        "string": matched_string,
                        "section_name": section.Name.decode().rstrip('\x00'),
                        "rva": rva
                    })

    return results

In [5]:
pd.DataFrame(analyze_file('./samples/network_windows_amd64.out', 'network_detect.yara'))

Unnamed: 0,rule,string,section_name,rva
0,network_detect_partial_GET,$a,.text,856434
1,network_detect_c2_domain,$a,.rdata,1047124
2,network_detect_c2_ipv6,$a,.rdata,1052123
3,network_detect_magic,$a,.text,856916
4,network_detect_error_message_string,$a,.rdata,1040108
5,detect_go_binary_buildid,$a,.text,4096


In [6]:
pd.DataFrame(analyze_file('./samples/network_windows_arm64.out', 'network_detect.yara'))

Unnamed: 0,rule,string,section_name,rva
0,network_detect_c2_domain,$a,.rdata,1030837
1,network_detect_c2_ipv6,$a,.rdata,1035862
2,network_detect_error_message_string,$a,.rdata,1023821
3,detect_go_binary_buildid,$a,.text,4096
4,detect_go_binary_buildid,$elf,.symtab,2950748


In [7]:
pd.DataFrame(analyze_file('./samples/network_windows_386.out', 'network_detect.yara'))

Unnamed: 0,rule,string,section_name,rva
0,network_detect_partial_GET,$a,.rdata,989099
1,network_detect_full_GET,$a,.rdata,989099
2,network_detect_c2_domain,$a,.rdata,999050
3,network_detect_c2_ipv6,$a,.rdata,1004019
4,network_detect_magic,$a,.text,841094
5,network_detect_error_message_string,$a,.rdata,991940
6,detect_go_binary_buildid,$a,.text,4208


In [8]:
pd.DataFrame(analyze_file('./samples/network_windows_arm.out', 'network_detect.yara'))

Unnamed: 0,rule,string,section_name,rva
0,network_detect_partial_GET,$a,.rdata,1067889
1,network_detect_full_GET,$a,.rdata,1067889
2,network_detect_c2_domain,$a,.rdata,1077840
3,network_detect_c2_ipv6,$a,.rdata,1082835
4,network_detect_error_message_string,$a,.rdata,1070730
5,detect_go_binary_buildid,$a,.text,4096
