Skip to content

Commit

Permalink
A bunch of parser improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
rchiossi committed Aug 12, 2013
1 parent 41cf511 commit 718cad5
Show file tree
Hide file tree
Showing 11 changed files with 256 additions and 55 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -5,5 +5,6 @@ tests/
void/
*.o
*.so
*.pyc
*~
*#*
2 changes: 1 addition & 1 deletion Makefile
@@ -1,5 +1,5 @@
CC = gcc
CFLAGS = -fPIC
CFLAGS = -fPIC -g

LIB = dexterity.so

Expand Down
2 changes: 1 addition & 1 deletion builder.c
Expand Up @@ -5,7 +5,7 @@
int main(void) {
ByteStream* in = bsmap("./tests/classes.dex");
ByteStream* out = bsalloc(in->size);
Dex* dex = dxdex(in,0);
Dex* dex = dxdex(in);

dxb_header(out,dex->header);

Expand Down
2 changes: 0 additions & 2 deletions bytestream.h
Expand Up @@ -8,8 +8,6 @@
#define BS_RW 1

typedef struct _ByteStream {
unsigned int mode;

char* filename;
size_t size;

Expand Down
24 changes: 23 additions & 1 deletion dex.c
@@ -1,12 +1,34 @@
#include "bytestream.h"
#include "dex.h"

Dex* dxdex(ByteStream* bs, uint32_t offset) {
#define CHECK(_OBJ,_RET) if ((_OBJ)->meta.corrupted) return (_RET)
#define ALLOC_LIST(_OBJ,_SIZE,_RET) \
do { \
(_OBJ) = malloc (sizeof(void*)*(_SIZE)); \
if ((_OBJ) == NULL) return (_RET); \
} while(0)

//#define READ_LIST(_BS,_OFF,_SIZE,_LIST,_RFUNC) \
// do{ \
// bsseek((_BS),(_OFF)); \
// for (int i=0; i<(_SIZE),i++) { \
// (_LIST)[i] = _RFUNC((_BS),(_BS)->offset); \
// } \
//} while(0)

Dex* dxdex_off(ByteStream* bs, uint32_t offset) {
Dex* dex = (Dex*) malloc(sizeof(Dex));

if (dex == NULL) return NULL;

dex->header = dx_header(bs,offset+0);
CHECK(dex->header,dex);

ALLOC_LIST(dex->string_ids,dex->header->string_ids_size,dex);
// READ_LIST(bs,dex->header->string_ids_off,
// dex->header->string_ids_size,
// dex->string_ids,dx_stringid)


return dex;
}
Expand Down
13 changes: 12 additions & 1 deletion dex.h
Expand Up @@ -182,6 +182,8 @@ typedef struct _DexMapList {

typedef struct _Dex {
DexHeaderItem* header;
DexStringIdItem** string_ids;
DexTypeIdItem** type_ids;
} Dex;

//Parse
Expand All @@ -190,7 +192,15 @@ int dexread(ByteStream* bs, uint8_t* buf, size_t size, uint32_t offset);
#define DXPARSE(_name,_type) _type* _name (ByteStream* bs, uint32_t offset)

DXPARSE(dx_header,DexHeaderItem);

DXPARSE(dx_stringid,DexStringIdItem);
DXPARSE(dx_typeid,DexTypeIdItem);
DXPARSE(dx_protoid,DexProtoIdItem);
DXPARSE(dx_fieldid,DexFieldIdItem);
DXPARSE(dx_methodid,DexMethodIdItem);
DXPARSE(dx_classdef,DexClassDefItem);

DXPARSE(dx_stringdata,DexStringDataItem);

//Build
int dexwrite(ByteStream* bs, uint8_t* buf, size_t size, uint32_t offset);
Expand All @@ -201,7 +211,8 @@ DXBUILD(dxb_header,DexHeaderItem);
DXBUILD(dxb_stringid,DexStringIdItem);

//General
Dex* dxdex(ByteStream* bs, uint32_t offset);
#define dxdex(_bs) dxdex_off(_bs,(uint32_t) 0x0)
Dex* dxdex_off(ByteStream* bs, uint32_t offset);
void dxfree(Dex* dex);

#endif
220 changes: 173 additions & 47 deletions dex.py
Expand Up @@ -3,8 +3,9 @@
from ctypes import cdll
from ctypes import Structure
from ctypes import POINTER, pointer
from ctypes import Array

from ctypes import c_int, c_uint, c_uint8, c_uint32
from ctypes import c_int, c_uint, c_uint8, c_uint16, c_uint32
from ctypes import c_char_p

from ctypes import create_string_buffer
Expand All @@ -20,8 +21,13 @@ class _ByteStream(Structure):
]

class ByteStream(object):
def __init__(self,fname):
self._bs = dxlib.bsmap(fname)
def __init__(self,fname=None,size=None):
if fname != None:
self._bs = dxlib.bsmap(fname)
elif size != None:
self._bs = dxlib.bsalloc(size)
else:
raise(Exception("Not enough parameters for ByteStream"))

def __del__(self):
dxlib.bsfree(self._bs)
Expand All @@ -46,47 +52,113 @@ def exhausted(self):
return (self._bs.contents.exhausted != 0)

#LEB128
class _Leb128(Structure):
class Leb128(Structure):
_fields_ = [
('data',c_uint8 * 5),
('size',c_uint),
]

def uleb(self):
return dxlib.ul128toui(self)

def ulebp1(self):
return dxlib.ul128p1toui(self)

def sleb(self):
return dxlib.sl128toui(self)

#Dex
class _Metadata(Structure):
class Metadata(Structure):
_fields_ = [
('corrupted',c_uint),
('offset', c_uint),
]

class _DexHeaderItem(Structure):
class DexHeaderItem(Structure):
_fields_ = [
('meta',_Metadata),
('magic',c_uint8 * 8),
('checksum',c_uint32),
('signature',c_uint8 * 20),
('file_size',c_uint32),
('header_size',c_uint32),
('endian_tag',c_uint32),
('link_size',c_uint32),
('link_off',c_uint32),
('map_off',c_uint32),
('string_ids_size',c_uint32),
('string_ids_off',c_uint32),
('type_ids_size',c_uint32),
('type_ids_off',c_uint32),
('proto_ids_size',c_uint32),
('proto_ids_off',c_uint32),
('field_ids_size',c_uint32),
('field_ids_off',c_uint32),
('method_ids_size',c_uint32),
('method_ids_off',c_uint32),
('class_defs_size',c_uint32),
('class_defs_off',c_uint32),
('data_size',c_uint32),
('data_off',c_uint32),
('meta', Metadata),
('magic', c_uint8 * 8),
('checksum', c_uint32),
('signature', c_uint8 * 20),
('file_size', c_uint32),
('header_size', c_uint32),
('endian_tag', c_uint32),
('link_size', c_uint32),
('link_off', c_uint32),
('map_off', c_uint32),
('string_ids_size', c_uint32),
('string_ids_off', c_uint32),
('type_ids_size', c_uint32),
('type_ids_off', c_uint32),
('proto_ids_size', c_uint32),
('proto_ids_off', c_uint32),
('field_ids_size', c_uint32),
('field_ids_off', c_uint32),
('method_ids_size', c_uint32),
('method_ids_off', c_uint32),
('class_defs_size', c_uint32),
('class_defs_off', c_uint32),
('data_size', c_uint32),
('data_off', c_uint32),
]


class DexStringIdItem(Structure):
_fields_ = [
('meta', Metadata),
('string_data_off', c_uint32),
]

class DexStringDataItem(Structure):
_fields_ = [
('meta', Metadata),
('size', Leb128),
('data', c_char_p),
]

class DexTypeIdItem(Structure):
_fields_ = [
('meta', Metadata),
('descriptor_idx', c_uint32),
]

class DexProtoIdItem(Structure):
_fields_ = [
('meta', Metadata),
('shorty_idx', c_uint32),
('return_type_idx', c_uint32),
('parameters_off', c_uint32),
]


class DexFieldIdItem(Structure):
_fields_ = [
('meta', Metadata),
('class_idx', c_uint16),
('type_idx', c_uint16),
('name_idx', c_uint32),
]

class DexMethodIdItem(Structure):
_fields_ = [
('meta', Metadata),
('class_idx', c_uint16),
('proto_idx', c_uint16),
('name_idx', c_uint32),
]

class DexClassDefItem(Structure):
_fields_ = [
('meta', Metadata),
('class_idx', c_uint32),
('access_flags', c_uint32),
('superclass_idx', c_uint32),
('interfaces_off', c_uint32),
('source_file_idx', c_uint32),
('annotations_off', c_uint32),
('class_data_off', c_uint32),
('static_values_off', c_uint32),
]

#Load Library
dxlib = cdll.LoadLibrary("./dexterity.so")

Expand All @@ -112,34 +184,88 @@ class _DexHeaderItem(Structure):
dxlib.bsreset.argtypes = (POINTER(_ByteStream),)
dxlib.bsreset.restype = None

# Leb128 Prototypes
dxlib.ul128toui.argtypes = (Leb128,)
dxlib.ul128toui.restype = c_uint

dxlib.ul128p1toui.argtypes = (Leb128,)
dxlib.ul128p1toui.restype = c_uint

dxlib.sl128toui.argtypes = (Leb128,)
dxlib.sl128toui.restype = c_uint

#Dex prototypes
def DXPARSE(name,res):
global dxlib
getattr(dxlib,name).argtypes = (POINTER(_ByteStream),c_uint32)
getattr(dxlib,name).restype = POINTER(res)

DXPARSE('dx_header',_DexHeaderItem)
DXPARSE('dx_header',DexHeaderItem)

DXPARSE('dx_stringid',DexStringIdItem)
DXPARSE('dx_typeid',DexTypeIdItem)
DXPARSE('dx_protoid',DexProtoIdItem)
DXPARSE('dx_fieldid',DexFieldIdItem)
DXPARSE('dx_methodid',DexMethodIdItem)
DXPARSE('dx_classdef',DexClassDefItem)

DXPARSE('dx_stringdata',DexStringDataItem)

#DexParser
class DexParser(object):
def __init__(self,filename):
if filename == None: raise(Exception("Null File Name."))
self.bs = ByteStream(filename)

def seek(self,offset):
self.bs.seek(offset)

def parse(self,func,offset):
if offset != None: self.seek(offset)
obj = func(self.bs._bs,self.bs._bs.contents.offset)
return obj.contents

#Main Structures
def header(self,offset=None):
return self.parse(dxlib.dx_header,offset)

# Legacy code
class _Dex(Structure):
_fields_ = [
('header',POINTER(_DexHeaderItem)),
]
def stringid(self,offset=None):
return self.parse(dxlib.dx_stringid,offset)

class Dex(object):
def __init__(self,bs,offset=0):
self._dex = dxlib.dxdex(bs._bs,offset)
def typeid(self,offset=None):
return self.parse(dxlib.dx_typeid,offset)

self.header = self._dex.contents.header.contents
def protoid(self,offset=None):
return self.parse(dxlib.dx_protoid,offset)

def __del__(self):
dxlib.dxfree(self._dex)
def fieldid(self,offset=None):
return self.parse(dxlib.dx_fieldid,offset)

def methodid(self,offset=None):
return self.parse(dxlib.dx_methodid,offset)

def classdef(self,offset=None):
return self.parse(dxlib.dx_classdef,offset)

def stringdata(self,offset=None):
return self.parse(dxlib.dx_stringdata,offset)

def dxprint(obj,pad=0):
print ' '*pad + "%s:" % obj.__class__.__name__

#dxlib.dxdex.argtypes = (POINTER(_ByteStream),c_uint)
#dxlib.dxdex.restype = POINTER(_Dex)
for name,a_class in obj._fields_:
val = getattr(obj,name)

dxlib.dxfree.argtypes = (POINTER(_Dex),)
dxlib.dxfree.restype = None
if issubclass(a_class,(Structure,)):
dxprint(val,pad+2)
elif issubclass(a_class,(Array,)):
data = ''.join(['%02x' % x for x in val])
print ' '* pad + " %s: %s" % (name,data)
elif issubclass(a_class,(c_char_p,)):
print ' '* pad + " %s: %s" % (name,val)
elif name.find('size') != -1:
print ' '*pad + " %s: %d" % (name,val)
elif name.find('idx') != -1:
print ' '*pad + " %s: %d" % (name,val)
else:
print ' '*pad + " %s: 0x%x" % (name,val)
7 changes: 7 additions & 0 deletions dex_parser.c
Expand Up @@ -54,6 +54,13 @@ DexStringDataItem* dx_stringdata(ByteStream* bs, uint32_t offset) {
return res;
}

res->data = (uint8_t*) malloc(sizeof(uint8_t)*ul128toui(res->size));

if (res->data == NULL) {
free(res);
return NULL;
}

check = bsread(bs,res->data,ul128toui(res->size));

res->meta.corrupted = check != ul128toui(res->size);
Expand Down

0 comments on commit 718cad5

Please sign in to comment.