From e0efde7cf65e0f22d5afa830339fb1dc6ca91479 Mon Sep 17 00:00:00 2001 From: Vladimir Menshakov Date: Thu, 16 Jun 2011 00:20:30 +0400 Subject: [PATCH] DREAMWEB: optimized consequtive movsb/w and stosb/w --- devtools/tasmrecover/tasm/cpp.py | 26 ++++---- devtools/tasmrecover/tasm/op.py | 16 ++--- devtools/tasmrecover/tasm/proc.py | 29 +++++++++ engines/dreamweb/dreamgen.cpp | 101 ++++-------------------------- engines/dreamweb/runtime.h | 45 ++++++++++--- 5 files changed, 97 insertions(+), 120 deletions(-) diff --git a/devtools/tasmrecover/tasm/cpp.py b/devtools/tasmrecover/tasm/cpp.py index 4e37dc8e5b5a..242082986902 100644 --- a/devtools/tasmrecover/tasm/cpp.py +++ b/devtools/tasmrecover/tasm/cpp.py @@ -403,31 +403,31 @@ def _pop(self, regs): self.body += p def _rep(self): - self.body += "\twhile(context.cx--) "; + self.body += "\twhile(context.cx--) " def _lodsb(self): - self.body += "\tcontext._lodsb();\n"; + self.body += "\tcontext._lodsb();\n" def _lodsw(self): - self.body += "\tcontext._lodsw();\n"; + self.body += "\tcontext._lodsw();\n" - def _stosb(self): - self.body += "\tcontext._stosb();\n"; + def _stosb(self, n): + self.body += "\tcontext._stosb(%s);\n" %("" if n == 1 else n) - def _stosw(self): - self.body += "\tcontext._stosw();\n"; + def _stosw(self, n): + self.body += "\tcontext._stosw(%s);\n" %("" if n == 1 else n) - def _movsb(self): - self.body += "\tcontext._movsb();\n "; + def _movsb(self, n): + self.body += "\tcontext._movsb(%s);\n " %("" if n == 1 else n) - def _movsw(self): - self.body += "\tcontext._movsw();\n "; + def _movsw(self, n): + self.body += "\tcontext._movsw(%s);\n " %("" if n == 1 else n) def _stc(self): - self.body += "\tcontext.flags._c = true;\n "; + self.body += "\tcontext.flags._c = true;\n " def _clc(self): - self.body += "\tcontext.flags._c = false;\n "; + self.body += "\tcontext.flags._c = false;\n " def __proc(self, name, def_skip = 0): try: diff --git a/devtools/tasmrecover/tasm/op.py b/devtools/tasmrecover/tasm/op.py index 33f79c3e5c85..9baebccfc3b1 100644 --- a/devtools/tasmrecover/tasm/op.py +++ b/devtools/tasmrecover/tasm/op.py @@ -316,27 +316,27 @@ def visit(self, visitor): class _stosw(baseop): def __init__(self, arg): - pass + self.repeat = 1 def visit(self, visitor): - visitor._stosw() + visitor._stosw(self.repeat) class _stosb(baseop): def __init__(self, arg): - pass + self.repeat = 1 def visit(self, visitor): - visitor._stosb() + visitor._stosb(self.repeat) class _movsw(baseop): def __init__(self, arg): - pass + self.repeat = 1 def visit(self, visitor): - visitor._movsw() + visitor._movsw(self.repeat) class _movsb(baseop): def __init__(self, arg): - pass + self.repeat = 1 def visit(self, visitor): - visitor._movsb() + visitor._movsb(self.repeat) class _in(baseop): def __init__(self, arg): diff --git a/devtools/tasmrecover/tasm/proc.py b/devtools/tasmrecover/tasm/proc.py index 57f17518fb9e..3c4ac2137220 100644 --- a/devtools/tasmrecover/tasm/proc.py +++ b/devtools/tasmrecover/tasm/proc.py @@ -27,6 +27,30 @@ def remove_label(self, label): self.stmts.remove(l) return + def optimize_sequence(self, cls): + i = 0 + stmts = self.stmts + while i < len(stmts): + if not isinstance(stmts[i], cls): + i += 1 + continue + j = i + 1 + + while j < len(stmts): + if not isinstance(stmts[j], cls): + break + j = j + 1 + + n = j - i + if n > 1: + print "Eliminate consequtive storage instructions at %u-%u" %(i, j) + del stmts[i + 1:j] + stmts[i].repeat = n + else: + i = j + + return + def optimize(self): print "optimizing..." #trivial simplifications, removing last ret @@ -72,6 +96,11 @@ def optimize(self): if not used: print self.labels self.remove_label(s.name) + + self.optimize_sequence(op._stosb); + self.optimize_sequence(op._stosw); + self.optimize_sequence(op._movsb); + self.optimize_sequence(op._movsw); def add(self, stmt): #print stmt diff --git a/engines/dreamweb/dreamgen.cpp b/engines/dreamweb/dreamgen.cpp index cbdb25256cfb..f76a3a5b584c 100644 --- a/engines/dreamweb/dreamgen.cpp +++ b/engines/dreamweb/dreamgen.cpp @@ -4204,38 +4204,7 @@ void clearwork(Context &context) { context.di = 0; context.cx = (200*320)/64; clearloop: - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); + context._stosw(32); if (--context.cx) goto clearloop; } @@ -4366,70 +4335,27 @@ void doblocks(Context &context) { context.bh = 14; context.bh = 4; firstbitofblock: - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); + context._movsw(8); context._add(context.di, (320)-16); context._dec(context.bh); if (!context.flags.z()) goto firstbitofblock; context.bh = 12; loop125: - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); - context._movsw(); + context._movsw(8); context.ax = 0x0dfdf; - context._stosw(); - context._stosw(); + context._stosw(2); context._add(context.di, (320)-20); context._dec(context.bh); if (!context.flags.z()) goto loop125; context._add(context.di, 4); context.ax = 0x0dfdf; - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); + context._stosw(8); context._add(context.di, (320)-16); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); + context._stosw(8); context._add(context.di, (320)-16); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); + context._stosw(8); context._add(context.di, (320)-16); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); - context._stosw(); + context._stosw(8); zeroblock: context.si = context.pop(); context.di = context.pop(); @@ -4811,9 +4737,7 @@ void fadetowhite(Context &context) { while(context.cx--) context._stosb(); context.di = (0+(180*10)+32+60+(32*32)+(11*10*3)+768); context.al = 0; - context._stosb(); - context._stosb(); - context._stosb(); + context._stosb(3); paltostartpal(context); context.data.byte(kFadedirection) = 1; context.data.byte(kFadecount) = 63; @@ -4830,9 +4754,7 @@ void fadefromwhite(Context &context) { while(context.cx--) context._stosb(); context.di = (0+(180*10)+32+60+(32*32)+(11*10*3)); context.al = 0; - context._stosb(); - context._stosb(); - context._stosb(); + context._stosb(3); paltoendpal(context); context.data.byte(kFadedirection) = 1; context.data.byte(kFadecount) = 63; @@ -17179,8 +17101,7 @@ void clearchanges(Context &context) { context.es = context.cs; context.di = 8011; context.al = 1; - context._stosb(); - context._stosb(); + context._stosb(2); context.al = 0; context._stosb(); context.al = 1; diff --git a/engines/dreamweb/runtime.h b/engines/dreamweb/runtime.h index 1fb232c18761..3b97ce193071 100644 --- a/engines/dreamweb/runtime.h +++ b/engines/dreamweb/runtime.h @@ -449,31 +449,58 @@ class Context { src = r; } + inline void _lodsb() { + al = ds.byte(si++); + } + + inline void _lodsw() { + ax = ds.word(si); + si += 2; + } + inline void _movsb() { es.byte(di++) = ds.byte(si++); } + inline void _movsb(uint size) { + uint8 *dst = es.ptr(di, size); + uint8 *src = ds.ptr(si, size); + memcpy(dst, src, size); + di += size; + si += size; + } + inline void _movsw() { _movsb(); _movsb(); } - inline void _lodsb() { - al = ds.byte(si++); - } - - inline void _lodsw() { - ax = ds.word(si); - si += 2; + inline void _movsw(uint size) { + _movsb(size * 2); } inline void _stosb() { es.byte(di++) = al; } + inline void _stosb(uint size) { + uint8 *dst = es.ptr(di, size); + memset(dst, al, size); + di += size; + } + inline void _stosw() { - es.word(di) = ax; - di += 2; + es.byte(di++) = al; + es.byte(di++) = ah; + } + + inline void _stosw(uint size) { + uint8 *dst = es.ptr(di, size); + di += 2 * size; + while(size--) { + *dst++ = al; + *dst++ = ah; + } } inline void _xchg(uint16 &a, uint16 &b) {