In [1]:
from struct import pack, unpack
import io
import re
import r2pipe
from darter.core import parse_snapshot

# Loading and parsing the snapshot (for AOT)

Here we open the file to inspect. It actually contains *two* snapshots, one is the common base and the other contains the actual user code.  
We use `parse_snapshot` to parse the base, and then again to parse the user snapshot.

By default we are inspecting `sample-app.so`, an included sample file which results from building the default Flutter app.  
To inspect another file, **place the filename here:**

In [2]:
fname = 'sample-app.so'

# Open file
with open(fname, 'rb') as f:
    app = io.BytesIO(f.read())

# Obtain file info (sections, symbols)
r2 = r2pipe.open(fname)
sections = r2.cmdj('iSj')
symbol_list = r2.cmdj('isj')
symbols = { s['name']: s['paddr'] for s in symbol_list }

# Parse VM snapshot, then isolate snapshot
print('------- PARSING VM SNAPSHOT --------\n')
_, base = parse_snapshot(app, symbols['_kDartVmSnapshotData'], symbols['_kDartVmSnapshotInstructions'], vm=True)
print('\n------- PARSING ISOLATE SNAPSHOT --------\n')
clusters, refs = parse_snapshot(app, symbols['_kDartIsolateSnapshotData'], symbols['_kDartIsolateSnapshotInstructions'], base=base)

------- PARSING VM SNAPSHOT --------

[Header]
  length = 4733
  kind = 2 ('kFullAOT', 'Full + AOT code')

[Snapshot header]
  version = 'c8562f0ee0ebc38ba217c7955956d1cb'
  features = 'product use_bare_instructions no-"asserts" causal_async_stacks no-bytecode arm-eabi softfp'

  base objects: 95
  objects: 935
  clusters: 5
  code order length = 69
  data starts at 0x2c22a0

INFO: [002c1094]: Reading allocation clusters...
INFO: [002c13a9]: Reading fill clusters...
INFO: [002c2215]: Reading roots...
INFO: [002c2281]: Snasphot parsed.

------- PARSING ISOLATE SNAPSHOT --------

[Header]
  length = 836159
  kind = 2 ('kFullAOT', 'Full + AOT code')

[Snapshot header]
  version = 'c8562f0ee0ebc38ba217c7955956d1cb'
  features = 'product use_bare_instructions no-"asserts" causal_async_stacks no-bytecode arm-eabi softfp'

  base objects: 935
  objects: 74247
  clusters: 222
  code order length = 7228
  data starts at 0x393260

INFO: [002c7098]: Reading allocation clusters...
INFO: [002d8228]

If the above didn't produce any warning, then you are good to go!

The parsed data is in `clusters` and `refs`; we will now analyze it further.

# Analyzing parsed data

We will start by defining some basic functions, tables and stats to help us analyze the data:

In [3]:
from darter.constants import *
from darter.other import *
from collections import defaultdict

def is_relevant(src):
    if src[0] == refs['root'].x['global_object_pool'] or src[0] == refs['root'].x['symbol_table']:
        return False
    return True

def show_rev_tree(ref, depth=4, max_srcs=5, i_step=4, hide_irrelevant=True):
    ''' Shows a tree of back-references to an object; that is, things pointing to it. '''
    def show_src(src, depth, roots=set(), indent=0):
        if src[0] in roots: return
        if hide_irrelevant and not is_relevant(src): return
        print(" "*indent + '' + ", ".join(str(x) for x in src))
        if depth > 0:
            roots, indent = roots | {src[0]}, indent + i_step
            srcs = src[0].src
            if hasattr(src[0], 'nsrc'): srcs = srcs + src[0].nsrc
            for csrc in srcs[:max_srcs]: show_src(csrc, depth-1, roots, indent)
            if len(srcs) > max_srcs: print(" "*(indent) + '... {} more'.format(len(srcs)-max_srcs))
    show_src((ref,), depth)

def getcl(name, cl=clusters):
    ''' Get the cluster of a certain kind (i.e. `Function`) '''
    cid = kkClassId['k{}Cid'.format(name)]
    cl = [ c for c in cl if c['cid'] == cid ]
    assert len(cl) == 1
    return cl[0]

def getrefs(name, rf=refs, cl=clusters):
    ''' Get list of references defined in a certain kind of cluster '''
    c = getcl(name, cl)
    return [ rf[x] for x in range(c['ref_start'], c['ref_end']) ]

# Build class table
classes_refs = getrefs('Class')
classes = { c.x['cid']: c for c in classes_refs }
assert len(classes) == len(classes_refs)

# Build string table
strings_refs = getrefs('OneByteString') + getrefs('TwoByteString')
strings = { ref.x['value']: ref for ref in strings_refs }
assert len(strings) == len(strings_refs)

# TODO: show some basic stats

# Play!

You are now free to inspect the parsed data as you wish. Some examples:

In [4]:
# Print the first 5 functions of the app. They are 'reference objects':
for ref in getrefs('Function')[:5]:
    print(ref)

Function->2861
Function->2862
Function->2863
Function->2864
Function->2865


In [5]:
# You can use 'ref.x' to access the object data dictionary
ref = getrefs('Function')[4]
print(ref.x)

{'name': 'hitTestChildren'->64784, 'owner': Class->1132, 'result_type': Type->48984, 'parameter_types': Array[3]->54905, 'parameter_names': Array[3]->54904, 'type_parameters': <base Null>null, 'data': <base Null>null, 'code': Code->11870, 'packed_fields': 1310743, 'kind_tag': 142082048}


In [6]:
# Print the usage tree for a reference
show_rev_tree(ref)

Function->2865
    ClosureData->10072, parent_function
        Function->2862, data
            Code->11868, owner
            Array[511]->63682, items, 387
                GrowableObjectArray->54862, data
    Code->11870, owner
    Array[6]->55688, items, 3
        Class->1132, functions
            Function->2862, owner
                Code->11868, owner
                Array[511]->63682, items, 387
            Function->3377, owner
                Code->12331, owner
            Function->3378, owner
                Code->12332, owner
            Function->3379, owner
                Code->12333, owner
            ... 5 more


In [7]:
# Using 'refs', we can access the reference object for an ID (for instance, the Array above)
refs[55688]

Array[6]->55688

In [8]:
# Using 'strings', we can look up the reference object for a certain string
strings['Flutter Demo']

'Flutter Demo'->69305

# Finding references to VM objects from native code

If you are on AOT, there's no data that tells you the objects referenced from a function.

This experimental code attempts to disassemble the native code, looking for references to `r5`.
It parses those references into a `native_refs` dictionary.

It's going to take a while (some minutes), and it's not bulletproof, I've seen it miss some references...

In [163]:
MG = re.compile(r'[^a-z0-9A-Z_]r5([^a-z0-9A-Z_]|$)')
M1 = re.compile(r'add (\w+), (\w+), (\w+)')
M2 = re.compile(r'ldr (\w+), \[(\w+), (\w+)\]')

def extract_references(code):
    if 'instructions' not in code.x: return
    instr = code.x['instructions']
    r2.cmd('s ' + str(instr['data_addr']))
    ops = r2.cmdj('pdj ' + str(len(instr['data']) // 4))

    def read_op():
        m = ops.pop(0)
        return m['offset'], m['opcode']

    result = []
    def process(pc, reg, offset):
        offset += 1
        if offset % 4 != 0:
            raise Exception('Offset not aligned: {}'.format(offset))
        result.append((offset // 4, pc, reg))
    
    def parse_pline(op, exp_source='r5'):
        m = re.fullmatch(M1, op)
        if m:
            target = m.group(1)
            source = m.group(2)
            if exp_source != source or (target == 'r5'):
                raise Exception('Source / target not matching!')
            offset = int(m.group(3), 0)
            pc, op = read_op()
            target2, offset2 = parse_pline(op, target)
            return target2, offset + offset2
        m = re.fullmatch(M2, op)
        if m:
            target = m.group(1)
            source = m.group(2)
            if exp_source != source or (target == 'r5'):
                raise Exception('Source / target not matching!')
            offset = int(m.group(3), 0)
            return target, offset
        raise Exception('Unknown op line: ' + op)

    while len(ops):
        pc, op = read_op()
        if not re.search(MG, op): continue
        try:
            target, offset = parse_pline(op)
            process(pc, target, offset)
        except Exception as e:
            print(pc, e)
    return result

from time import time
native_refs = {}
cl = getcl('Code')
start = time()
for r in range(cl['ref_start'], cl['ref_end']):
    native_refs[r] = extract_references(refs[r])
print('Elapsed: {}s'.format(time() - start))

20944 Unknown op line: push {r0, r1, r2, r3, r5, fp, lr}
20996 Unknown op line: pop {r0, r1, r2, r3, r5, fp, lr}
21092 Unknown op line: push {r0, r1, r2, r3, r5, fp, lr}
21144 Unknown op line: pop {r0, r1, r2, r3, r5, fp, lr}
21244 Unknown op line: push {r0, r1, r2, r3, r5, fp, lr}
21292 Unknown op line: pop {r0, r1, r2, r3, r5, fp, lr}
Elapsed: 65.15995287895203s


Now populate the parsed references into an `nsrc` field on the reference object they point to:

In [166]:
for i in range(1, refs['next']):
    refs[i].nsrc = []
global_entries = refs['root'].x['global_object_pool'].x['entries']

for r, nrefs in native_refs.items():
    if nrefs is None: continue
    instr = refs[r].x['instructions']
    instr['nrefs'] = []
    for entry, pc, reg in nrefs:
        # FIXME: also look at patchable. track refs *at entry* and not ref
        if not (0 <= entry < len(global_entries)):
            print('Ref outside entries:', refs[r], entry, pc, reg)
            continue
        entry = global_entries[entry]
        if 'raw_obj' not in entry:
            #print('Not an object:', refs[r], entry, pc, reg)
            continue
        entry['raw_obj'].nsrc.append((refs[r], pc))
        instr['nrefs'].append((entry['raw_obj'], pc))

Ref outside entries: Code->21462 36192 22496 r3


Now go to the previous section and try to use `show_rev_tree` at the `Flutter Demo` string.

# Loading and parsing an AppJIT snapshot

AppAOT snapshot files are usually stored as an executable (ELF) so they have a standard format. However AppJIT snapshot files (`.snapshot`) have their custom mini-format, which stores the four blobs prepended with a simple header indicating the lengths of each of them.

Here's a sample snippet that demonstrates how to read and parse such a file. Replace `fname` with the file to inspect, and adjust `settings.py` accodingly:

In [3]:
fname = 'appjit-sample.dart.snapshot'

from darter.constants import kAppJITMagic, kAppSnapshotPageSize

# Read header, check magic
with open(fname, 'rb') as f:
    app = io.BytesIO(f.read())
magic = unpack('<Q', app.read(8))[0]
if magic != kAppJITMagic:
    print("WARN: Magic not matching, got 0x{:016x}".format(magic))
lengths = unpack('<qqqq', app.read(4 * 8))
print('Blob lengths:', lengths)

# Calculate offsets
offset, offsets = app.tell(), []
for length in lengths:
    offset = ((offset - 1) // kAppSnapshotPageSize + 1) * kAppSnapshotPageSize
    offsets.append(offset)
    offset += length

# Parse VM snapshot if present, then isolate snapshot
if lengths[0]:
    print('\n------- PARSING VM SNAPSHOT --------\n')
    _, base = parse_snapshot(app, offsets[0], offsets[1], vm=True, disableRoData=True)
else:
    print('No base snapshot, skipping base snasphot parsing...')
    base = None
    assert not lengths[1]
print('\n------- PARSING ISOLATE SNAPSHOT --------\n')
clusters, refs = parse_snapshot(app, offsets[2], offsets[3], base=base, disableRoData=True)

Blob lengths: (0, 0, 17599488, 6822944)
No base snapshot, skipping base snasphot parsing...

------- PARSING ISOLATE SNAPSHOT --------

[Header]
  length = 12708502
  kind = 1 ('kFullJIT', 'Full + JIT code')

[Snapshot header]
  version = 'c8562f0ee0ebc38ba217c7955956d1cb'
  features = 'release use_bare_instructions no-"asserts" "use_field_guards" "use_osr" causal_async_stacks no-bytecode x64-sysv'

  base objects: 934
  objects: 297885
  clusters: 160
  code order length = 0
  data starts at 0xc1faa0

WARN: Snapshot expected 934 base objects, but the provided base has 95
INFO: [000010ad]: Reading allocation clusters...
INFO: [0003c3f2]: Reading fill clusters...
INFO: [00c1f990]: Reading roots...
INFO: [00c1fa9a]: Snasphot parsed.


**Note:** As said in the README, darter doesn't fully support 64-bit architectures yet. Because most AppJIT snapshots are compiled for 64-bit archs, `disableRoData` is passed so that memory structures are not parsed (the rest should work).