Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 487 lines (386 sloc) 18.574 kb
099295b @kanzure load labels into the new disassembler
authored
1 import sys, os, time, datetime, json
132182e @kanzure super duper disassembler fixes
authored
2 from gbz80disasm import opt_table
4b7ca69 @kanzure advance forward when disassembling the rom
authored
3 from ctypes import c_int8
4 from copy import copy, deepcopy
099295b @kanzure load labels into the new disassembler
authored
5 from labels import get_label_from_line, get_address_from_line_comment
7dd0c86 @kanzure almost complete new disassembler version
authored
6
96596c6 @kanzure asm output for the new disassembler
authored
7 relative_jumps = [0x38, 0x30, 0x20, 0x28, 0x18, 0xc3, 0xda, 0xc2, 0x32]
132182e @kanzure super duper disassembler fixes
authored
8 relative_unconditional_jumps = [0xc3, 0x18]
9 call_commands = [0xdc, 0xd4, 0xc4, 0xcc, 0xcd]
10 end_08_scripts_with = [
11 0xe9, # jp hl
12 0xc9, # ret
13 ] # possibly also:
14 # 0xc3, # jp
15 # 0xc18, # jr
16 # 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0xc3, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8,
17 # 0xd0, 0xc0, 0xc8, 0xc9
18
96596c6 @kanzure asm output for the new disassembler
authored
19 spacing = "\t"
20
ee7d39b @kanzure move RomStr into a shared file
authored
21 class RomStr(str):
7dd0c86 @kanzure almost complete new disassembler version
authored
22 """ Simple wrapper to prevent a giant rom from being shown on screen.
ee7d39b @kanzure move RomStr into a shared file
authored
23 """
24
099295b @kanzure load labels into the new disassembler
authored
25 def __init__(self, *args, **kwargs):
26 self.load_labels()
27 str.__init__(self)
28
7dd0c86 @kanzure almost complete new disassembler version
authored
29 def __repr__(self):
30 """ Simplifies this object so that the output doesn't overflow stdout.
31 """
32 return "RomStr(too long)"
33
34 @classmethod
35 def load(cls, crystal=True, red=False):
36 """ Loads a ROM into a RomStr.
37 """
38 if crystal and not red:
39 file_handler = open("../baserom.gbc", "r")
40 elif red and not crystal:
41 file_handler = open("../pokered-baserom.gbc", "r")
42 else:
43 raise Exception, "not sure which rom to load?"
44 bytes = file_handler.read()
45 file_handler.close()
46 return RomStr(bytes)
47
099295b @kanzure load labels into the new disassembler
authored
48 def load_labels(self, filename="labels.json"):
49 """ Loads labels from labels.json, or parses the source code file and
50 generates new labels.
51 """
52 # blank out the hash
53 self.labels = {}
54
55 # check if the labels file exists
56 file_existence = os.path.exists(filename)
57
58 generate_labels = False
59
60 # determine if the labels file needs to be regenerated
61 if file_existence:
62 modified = os.path.getmtime(filename)
63 modified = datetime.datetime.fromtimestamp(modified)
64 current = datetime.datetime.fromtimestamp(time.time())
65
66 is_old = (current - modified) > datetime.timedelta(days=3)
67
68 if is_old:
69 generate_labels = True
70 else:
71 generate_labels = True
72
73 # scan the asm source code for labels
74 if generate_labels:
75 asm = open("../main.asm", "r").read().split("\n")
76
77 for line in asm:
78 label = get_label_from_line(line)
79
80 if label:
81 address = get_address_from_line_comment(line)
82
83 self.labels[address] = label
84
85 content = json.dumps(self.labels)
86 file_handler = open(filename, "w")
87 file_handler.write(content)
88 file_handler.close()
89
90 # load the labels from the file
91 self.labels = json.loads(open(filename, "r").read())
92
ee7d39b @kanzure move RomStr into a shared file
authored
93 def length(self):
7dd0c86 @kanzure almost complete new disassembler version
authored
94 """ len(self)
95 """
ee7d39b @kanzure move RomStr into a shared file
authored
96 return len(self)
97
7dd0c86 @kanzure almost complete new disassembler version
authored
98 def len(self):
99 """ len(self)
100 """
101 return self.length()
ee7d39b @kanzure move RomStr into a shared file
authored
102
103 def interval(self, offset, length, strings=True, debug=True):
7dd0c86 @kanzure almost complete new disassembler version
authored
104 """ returns hex values for the rom starting at offset until
105 offset+length
106 """
107 returnable = []
ee7d39b @kanzure move RomStr into a shared file
authored
108 for byte in self[offset:offset+length]:
109 if strings:
110 returnable.append(hex(ord(byte)))
111 else:
112 returnable.append(ord(byte))
113 return returnable
114
115 def until(self, offset, byte, strings=True, debug=False):
7dd0c86 @kanzure almost complete new disassembler version
authored
116 """ Returns hex values from rom starting at offset until the given
117 byte.
118 """
ee7d39b @kanzure move RomStr into a shared file
authored
119 return self.interval(offset, self.find(chr(byte), offset) - offset, strings=strings)
120
7dd0c86 @kanzure almost complete new disassembler version
authored
121 def to_asm(self, address, end_address=None, size=None, max_size=0x4000, debug=None):
122 """ Disassembles ASM at some address. This will stop disassembling when
123 either the end_address or size is met. Also, there's a maximum size
124 that will be parsed, so that large patches of data aren't parsed as
125 code.
126 """
e2babd6 @kanzure use romstr.py as the new disassembler
authored
127 if "0x" in address:
128 address = int(address, 16)
129
7dd0c86 @kanzure almost complete new disassembler version
authored
130 start_address = address
e2babd6 @kanzure use romstr.py as the new disassembler
authored
131
7dd0c86 @kanzure almost complete new disassembler version
authored
132 if start_address == None:
133 raise Exception, "address must be given"
134
135 if debug == None:
136 if not hasattr(self, "debug"):
137 debug = False
138 else:
139 debug = self.debug
140
141 # this is probably a terrible idea.. why am i doing this?
142 if size != None and max_size < size:
143 raise Exception, "max_size must be greater than or equal to size"
144 elif end_address != None and (end_address - start_address) > max_size:
145 raise Exception, "end_address is out of bounds"
146 elif end_address != None and size != None:
147 if (end_address - start_address) >= size:
148 size = end_address - start_address
149 else:
150 end_address = start_address + size
151 elif end_address == None and size != None:
152 end_address = start_address + size
153 elif end_address != None and size == None:
154 size = end_address - start_address
155
099295b @kanzure load labels into the new disassembler
authored
156 return DisAsm(start_address=start_address, end_address=end_address, size=size, max_size=max_size, debug=debug, rom=self)
7dd0c86 @kanzure almost complete new disassembler version
authored
157
099295b @kanzure load labels into the new disassembler
authored
158 class DisAsm:
7dd0c86 @kanzure almost complete new disassembler version
authored
159 """ z80 disassembler
160 """
161
162 def __init__(self, start_address=None, end_address=None, size=None, max_size=0x4000, debug=True, rom=None):
163 assert start_address != None, "start_address must be given"
164
165 if rom == None:
166 file_handler = open("../baserom.gbc", "r")
167 bytes = file_handler.read()
168 file_handler.close()
169 rom = RomStr(bytes)
170
171 if debug not in [None, True, False]:
172 raise Exception, "debug param is invalid"
173 if debug == None:
174 debug = False
175
176 # get end_address and size in sync with each other
177 if end_address == None and size != None:
178 end_address = start_address + size
179 elif end_address != None and size == None:
180 size = end_address - start_address
181 elif end_address != None and size != None:
182 size = max(end_address - start_address, size)
183 end_address = start_address + size
184
185 # check that the bounds make sense
186 if end_address != None:
187 if end_address <= start_address:
188 raise Exception, "end_address is out of bounds"
189 elif (end_address - start_address) > max_size:
190 raise Exception, "end_address goes beyond max_size"
191
192 # check more edge cases
193 if not start_address >= 0:
194 raise Exception, "start_address must be at least 0"
b19db1e @kanzure broken disassembler
authored
195 elif end_address != None and not end_address >= 0:
7dd0c86 @kanzure almost complete new disassembler version
authored
196 raise Exception, "end_address must be at least 0"
197
198 self.rom = rom
199 self.start_address = start_address
200 self.end_address = end_address
201 self.size = size
202 self.max_size = max_size
203 self.debug = debug
204
205 self.parse()
206
207 def parse(self):
208 """ Disassembles stuff and things.
209 """
210
211 rom = self.rom
212 start_address = self.start_address
213 end_address = self.end_address
214 max_size = self.max_size
215 debug = self.debug
216
217 bank_id = start_address / 0x4000
218
219 # [{"command": 0x20, "bytes": [0x20, 0x40, 0x50],
220 # "asm": "jp $5040", "label": "Unknown5040"}]
10a5a65 @kanzure somewhat improved disassembler
authored
221 asm_commands = {}
7dd0c86 @kanzure almost complete new disassembler version
authored
222
223 offset = start_address
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
224
7dd0c86 @kanzure almost complete new disassembler version
authored
225 last_hl_address = None
226 last_a_address = None
227 used_3d97 = False
228
229 keep_reading = True
230
b19db1e @kanzure broken disassembler
authored
231 while (end_address != 0 and offset <= end_address) or keep_reading:
10a5a65 @kanzure somewhat improved disassembler
authored
232 # read the current opcode byte
7dd0c86 @kanzure almost complete new disassembler version
authored
233 current_byte = ord(rom[offset])
10a5a65 @kanzure somewhat improved disassembler
authored
234 current_byte_number = len(asm_commands.keys())
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
235
10a5a65 @kanzure somewhat improved disassembler
authored
236 # setup this next/upcoming command
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
237 if offset in asm_commands.keys():
238 asm_command = asm_commands[offset]
239 else:
240 asm_command = {}
241
242 asm_command["address"] = offset
10a5a65 @kanzure somewhat improved disassembler
authored
243
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
244 if not "references" in asm_command.keys():
10a5a65 @kanzure somewhat improved disassembler
authored
245 # This counts how many times relative jumps reference this
246 # byte. This is used to determine whether or not to print out a
247 # label later.
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
248 asm_command["references"] = 0
10a5a65 @kanzure somewhat improved disassembler
authored
249
250 # some commands have two opcodes
251 next_byte = ord(rom[offset+1])
252
96596c6 @kanzure asm output for the new disassembler
authored
253 if self.debug:
254 print "offset: \t\t" + hex(offset)
255 print "current_byte: \t\t" + hex(current_byte)
256 print "next_byte: \t\t" + hex(next_byte)
b19db1e @kanzure broken disassembler
authored
257
10a5a65 @kanzure somewhat improved disassembler
authored
258 # all two-byte opcodes also have their first byte in there somewhere
b19db1e @kanzure broken disassembler
authored
259 if (current_byte in opt_table.keys()) or ((current_byte + (next_byte << 8)) in opt_table.keys()):
10a5a65 @kanzure somewhat improved disassembler
authored
260 # this might be a two-byte opcode
261 possible_opcode = current_byte + (next_byte << 8)
262
263 # check if this is a two-byte opcode
264 if possible_opcode in opt_table.keys():
265 op_code = possible_opcode
266 else:
267 op_code = current_byte
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
268
10a5a65 @kanzure somewhat improved disassembler
authored
269 op = opt_table[op_code]
270
271 opstr = op[0].lower()
272 optype = op[1]
273
96596c6 @kanzure asm output for the new disassembler
authored
274 if self.debug:
275 print "opstr: " + opstr
b19db1e @kanzure broken disassembler
authored
276
10a5a65 @kanzure somewhat improved disassembler
authored
277 asm_command["type"] = "op"
278 asm_command["id"] = op_code
279 asm_command["format"] = opstr
280 asm_command["opnumberthing"] = optype
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
281
96ca472 @kanzure implement a formatted-with-labels version of the opcode line
authored
282 opstr2 = None
96596c6 @kanzure asm output for the new disassembler
authored
283 base_opstr = copy(opstr)
96ca472 @kanzure implement a formatted-with-labels version of the opcode line
authored
284
4ccfe27 @kanzure more awful code for the new disassembler
authored
285 if "x" in opstr:
286 for x in range(0, opstr.count("x")):
287 insertion = ord(rom[offset + 1])
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
288
289 # Certain opcodes will have a local relative jump label
290 # here instead of a raw hex value, but this is
291 # controlled through asm output.
4ccfe27 @kanzure more awful code for the new disassembler
authored
292 insertion = "$" + hex(insertion)[2:]
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
293
4ccfe27 @kanzure more awful code for the new disassembler
authored
294 opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
295
96596c6 @kanzure asm output for the new disassembler
authored
296 if op_code in relative_jumps:
297 target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
298 insertion = "asm_" + hex(target_address)
36eb6c7 @kanzure use labels in the disassembler output
authored
299
300 if str(target_address) in self.rom.labels.keys():
301 insertion = self.rom.labels[str(target_address)]
302
96596c6 @kanzure asm output for the new disassembler
authored
303 opstr2 = base_opstr[:base_opstr.find("x")].lower() + insertion + base_opstr[base_opstr.find("x")+1:].lower()
304 asm_command["formatted_with_labels"] = opstr2
305
10a5a65 @kanzure somewhat improved disassembler
authored
306 current_byte_number += 1
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
307 offset += 1
4ccfe27 @kanzure more awful code for the new disassembler
authored
308
309 if "?" in opstr:
310 for y in range(0, opstr.count("?")):
311 byte1 = ord(rom[offset + 1])
312 byte2 = ord(rom[offset + 2])
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
313
4ccfe27 @kanzure more awful code for the new disassembler
authored
314 number = byte1
315 number += byte2 << 8;
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
316
317 # In most cases, you can use a label here. Labels will
318 # be shown during asm output.
4ccfe27 @kanzure more awful code for the new disassembler
authored
319 insertion = "$%.4x" % (number)
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
320
4ccfe27 @kanzure more awful code for the new disassembler
authored
321 opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
322
96ca472 @kanzure implement a formatted-with-labels version of the opcode line
authored
323 # This version of the formatted string has labels. In
324 # the future, the actual labels should be parsed
325 # straight out of the "main.asm" file.
326 target_address = number % 0x4000
327 insertion = "asm_" + hex(target_address)
36eb6c7 @kanzure use labels in the disassembler output
authored
328
329 if str(target_address) in self.rom.labels.keys():
330 insertion = self.rom.labels[str(target_address)]
331
96ca472 @kanzure implement a formatted-with-labels version of the opcode line
authored
332 opstr2 = base_opstr[:base_opstr.find("?")].lower() + insertion + base_opstr[base_opstr.find("?")+1:].lower()
333 asm_command["formatted_with_labels"] = opstr2
334
4ccfe27 @kanzure more awful code for the new disassembler
authored
335 current_byte_number += 2
336 offset += 2
10a5a65 @kanzure somewhat improved disassembler
authored
337
338 # Check for relative jumps, construct the formatted asm line.
339 # Also set the usage of labels.
4b7ca69 @kanzure advance forward when disassembling the rom
authored
340 if current_byte in [0x18, 0x20] + relative_jumps: # jr or jr nz
10a5a65 @kanzure somewhat improved disassembler
authored
341 # generate a label for the byte we're jumping to
96596c6 @kanzure asm output for the new disassembler
authored
342 target_address = offset + 1 + c_int8(ord(rom[offset])).value
10a5a65 @kanzure somewhat improved disassembler
authored
343
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
344 if target_address in asm_commands.keys():
345 asm_commands[target_address]["references"] += 1
346 remote_label = "asm_" + hex(target_address)
347 asm_commands[target_address]["current_label"] = remote_label
348 asm_command["remote_label"] = remote_label
10a5a65 @kanzure somewhat improved disassembler
authored
349
9a41584 @kanzure remove debug lines
authored
350 # Not sure how to set this, can't be True because an
351 # address referenced multiple times will use a label
352 # despite the label not necessarily being used in the
353 # output. The "use_remote_label" values should be
354 # calculated when rendering the asm output, based on
355 # which addresses and which op codes will be displayed
356 # (within the range).
357 asm_command["use_remote_label"] = "unknown"
10a5a65 @kanzure somewhat improved disassembler
authored
358 else:
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
359 remote_label = "asm_" + hex(target_address)
360
361 # This remote address might not be part of this
362 # function.
363 asm_commands[target_address] = {
364 "references": 1,
365 "current_label": remote_label,
96596c6 @kanzure asm output for the new disassembler
authored
366 "address": target_address,
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
367 }
368 # Also, target_address can be negative (before the
369 # start_address that the user originally requested),
370 # and it shouldn't be shown on asm output because the
371 # intermediate bytes (between a negative target_address
372 # and start_address) won't be disassembled.
373
374 # Don't know yet if this remote address is part of this
375 # function or not. When the remote address is not part
376 # of this function, the label name should not be used,
377 # because that label will not be disassembled in the
378 # output, until the user asks it to.
379 asm_command["use_remote_label"] = "unknown"
380 asm_command["remote_label"] = remote_label
10a5a65 @kanzure somewhat improved disassembler
authored
381 elif current_byte == 0x3e:
382 last_a_address = ord(rom[offset + 1])
383
384 # store the formatted string for the output later
385 asm_command["formatted"] = opstr
386
387 if current_byte == 0x21:
388 last_hl_address = byte1 + (byte2 << 8)
389
390 # this is leftover from pokered, might be meaningless
391 if current_byte == 0xcd:
392 if number == 0x3d97:
393 used_3d97 = True
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
394
10a5a65 @kanzure somewhat improved disassembler
authored
395 if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
396 if current_byte == 0xc3:
397 if number == 0x3d97:
398 used_3d97 = True
399
400 # stop reading at a jump, relative jump or return
4ccfe27 @kanzure more awful code for the new disassembler
authored
401 if current_byte in end_08_scripts_with:
10a5a65 @kanzure somewhat improved disassembler
authored
402 is_data = False
403
132182e @kanzure super duper disassembler fixes
authored
404 if not self.has_outstanding_labels(asm_commands, offset):
4ccfe27 @kanzure more awful code for the new disassembler
authored
405 keep_reading = False
406 break
407 else:
408 keep_reading = True
409 else:
410 keep_reading = True
10a5a65 @kanzure somewhat improved disassembler
authored
411
4ccfe27 @kanzure more awful code for the new disassembler
authored
412 else:
10a5a65 @kanzure somewhat improved disassembler
authored
413 # This shouldn't really happen, and means that this area of the
414 # ROM probably doesn't represent instructions.
415 asm_command["type"] = "data" # db
416 asm_command["value"] = current_byte
b19db1e @kanzure broken disassembler
authored
417 keep_reading = False
cd60a1f @kanzure backtracking and labeling for the disassembler
authored
418
132182e @kanzure super duper disassembler fixes
authored
419 # save this new command in the list
420 asm_commands[asm_command["address"]] = asm_command
421
4b7ca69 @kanzure advance forward when disassembling the rom
authored
422 # jump forward by a byte
423 offset += 1
424
132182e @kanzure super duper disassembler fixes
authored
425 # also save the last command if necessary
426 if asm_commands[asm_commands.keys()[-1]] is not asm_command:
427 asm_commands[asm_command["address"]] = asm_command
428
429 # store the set of commands on this object
b19db1e @kanzure broken disassembler
authored
430 self.asm_commands = asm_commands
132182e @kanzure super duper disassembler fixes
authored
431
96596c6 @kanzure asm output for the new disassembler
authored
432 self.end_address = offset + 1
7dd0c86 @kanzure almost complete new disassembler version
authored
433
132182e @kanzure super duper disassembler fixes
authored
434 def has_outstanding_labels(self, asm_commands, offset):
435 """ Checks if there are any labels that haven't yet been created.
436 """ # is this really necessary??
437 return False
438
7dd0c86 @kanzure almost complete new disassembler version
authored
439 def __str__(self):
440 """ ASM pretty printer.
441 """
96596c6 @kanzure asm output for the new disassembler
authored
442 output = ""
443
444 for (key, line) in self.asm_commands.items():
445 # skip anything from before the beginning
446 if key < self.start_address:
447 continue
448
449 # show a label
450 if line["references"] > 0 and "current_label" in line.keys():
451 if line["address"] == self.start_address:
452 output += "thing: ; " + hex(line["address"]) + "\n"
453 else:
e471fbe @kanzure continue at relative jumps and fix formatting
authored
454 output += "." + line["current_label"] + "\@ ; " + hex(line["address"]) + "\n"
96596c6 @kanzure asm output for the new disassembler
authored
455
456 # show the actual line
457 if line.has_key("formatted_with_labels"):
458 output += spacing + line["formatted_with_labels"]
459 elif line.has_key("formatted"):
460 output += spacing + line["formatted"]
461 #output += " ; to " +
462 output += "\n"
463
464 # show the next address after this chunk
465 output += "; " + hex(self.end_address)
466
467 return output
7dd0c86 @kanzure almost complete new disassembler version
authored
468
0edf9eb @kanzure move AsmList into romstr.py too
authored
469 class AsmList(list):
7dd0c86 @kanzure almost complete new disassembler version
authored
470 """ Simple wrapper to prevent all asm lines from being shown on screen.
471 """
0edf9eb @kanzure move AsmList into romstr.py too
authored
472
473 def length(self):
7dd0c86 @kanzure almost complete new disassembler version
authored
474 """ len(self)
475 """
0edf9eb @kanzure move AsmList into romstr.py too
authored
476 return len(self)
477
478 def __repr__(self):
7dd0c86 @kanzure almost complete new disassembler version
authored
479 """ Simplifies this object so that the output doesn't overflow stdout.
480 """
0edf9eb @kanzure move AsmList into romstr.py too
authored
481 return "AsmList(too long)"
482
e2babd6 @kanzure use romstr.py as the new disassembler
authored
483 if __name__ == "__main__":
484 cryrom = RomStr(open("../pokecrystal.gbc", "r").read());
485 asm = cryrom.to_asm(sys.argv[1])
486 print asm
Something went wrong with that request. Please try again.