/
decomp.hpp
144 lines (126 loc) · 5.48 KB
/
decomp.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/* ReC98
* -----
* Declarations to help decompiling the seemingly impossible
*/
// Flag comparisons
// ----------------
// When used inside a conditional expression like
// if(FLAGS_*) { goto some_label; | return; }
// these assemble into the single given instruction. Apply the ! operator to
// get the N versions.
#define FLAGS_CARRY (_FLAGS & 0x01) /* JC / JAE / JB */
#define FLAGS_ZERO (_FLAGS & 0x40) /* JZ */
#define FLAGS_SIGN (_FLAGS & 0x80) /* JS */
// ----------------
// Alternate version that doesn't spill the port number to DX
#define outportb2(port, val) _asm { \
mov al, val; \
out port, al; \
}
// Alternate version that sets the value first
#define outport2(port, val) _asm { \
mov ax, val; \
mov dx, port; \
out dx, ax; \
}
// Should just be unwrapped wherever it appears. Code that directly uses T
// would be much cleaner.
template <class T> union StupidBytewiseWrapperAround {
T t;
int8_t byte[sizeof(T)];
uint8_t ubyte[sizeof(T)];
};
// poke() versions that actually inline with pseudoregisters
// ---------------------------------------------------------
#define pokew(sgm, off, val) { *(uint16_t far *)(MK_FP(sgm, off)) = val; }
// Turbo C++ 4.0 generates wrong segment prefix opcodes for the _FS and _GS
// pseudoregisters - 0x46 (INC SI) and 0x4E (DEC SI) rather than the correct
// 0x64 and 0x65, respectively. These prefixes are also not supported in
// inline assembly, which is limited to pre-386 anyway. Compiling via assembly
// (`#pragma inline`) would work and generate the correct instructions here,
// but that would incur yet another dependency on a 16-bit TASM, for something
// honestly quite insignificant.
//
// So, can we somehow work around this issue while retaining the readability
// of the usage code and pretending that this bug doesn't exist? Comparisons
// with segment registers unfortunately don't inline, so something like
// if(sgm == _FS)
// wouldn't work, even inside a macro that replaces [sgm] with _FS. But since
// __emit__() *does* inline, we can use function templates! The default
// versions provide the regularly intended C code for all other registers,
// while explicit specializations for _FS and _GS __emit__() the correct
// instruction opcodes for all offset registers needed. Then, we only need to
// somehow move the pseudoregisters up into the type system... which can
// simply be done by turning them into class names via preprocessor token
// pasting. Sure, this limits this approach to raw registers with no immediate
// offsets, but let's hope we won't ever need those...
//
// Also, hey, no need for the MK_FP() macro if we directly return the correct
// types.
#if defined(__TURBOC__) && defined(__MSDOS__)
// Declared in <dos.h> in these compilers.
void __emit__(uint8_t __byte, ...);
#endif
struct Decomp_ES { void __seg* value() { return (void __seg *)(_ES); } };
struct Decomp_FS { void __seg* value() { return (void __seg *)(_FS); } };
struct Decomp_GS { void __seg* value() { return (void __seg *)(_GS); } };
struct Decomp_DI { void __near* value() { return (void __near *)(_DI); } };
// Removing [val] from the parameter lists of the template functions below
// perfects the inlining.
#define poked(sgm, off, val) \
_EAX = val; \
poked_eax((Decomp##sgm *)NULL, (Decomp##off *)NULL, (uint8_t)(0x89));
#define poke_or_d(sgm, off, val) \
_EAX = val; \
poked_eax((Decomp##sgm *)NULL, (Decomp##off *)NULL, (uint8_t)(0x09));
template <class Segment, class Offset> inline void poked_eax(
Segment *sgm, Offset *off, uint8_t op
) {
if(op == 0x89) {
*(uint32_t far *)(sgm->value() + off->value()) = _EAX;
} else if(op == 0x09) {
*(uint32_t far *)(sgm->value() + off->value()) |= _EAX;
}
}
inline void poked_eax(Decomp_FS *sgm, Decomp_DI *off, uint8_t op) {
__emit__(0x66, 0x64, op, 0x05); // [op] FS:[DI], EAX
}
inline void poked_eax(Decomp_GS *sgm, Decomp_DI *off, uint8_t op) {
__emit__(0x66, 0x65, op, 0x05); // [op] GS:[DI], EAX
}
// ---------------------------------------------------------
// Circumventing compiler optimizations
// ------------------------------------
// If you don't want to recreate the code layout of the original PC-98
// binaries, these can be safely deleted. They just make the code worse.
#if defined(__TURBOC__) && defined(__MSDOS__)
// Use this function wherever the original code used a immediate 0 literal
// that Turbo C++ would optimize away, e.g. in register assignments
// (_AX = 0 → XOR AX, AX) or comparisons (_AX == 0 → OR AX, AX). This way,
// the compiler is forced to leave space for any potential offset, with the
// literal 0 then being spelled out by the linker.
template <class T> inline T keep_0(T x) {
if(x == 0) {
extern void *near address_0;
return reinterpret_cast<pixel_t>(&address_0);
}
return x;
}
// Bypasses the -Z -3 function parameter optimization, where [x] would be
// combined with any potential subsequent 16-bit parameter adjacent in
// memory to form a 32-bit PUSH.
// (Interestingly, using a template function inlines either too well or
// too badly. Only this macro guarantees the intended 16-bit PUSH to be
// consistently emitted.)
#define inhibit_Z3(type, x) \
*reinterpret_cast<type near *>(reinterpret_cast<uint16_t>(&x))
#else
#define keep_0(x) x
#define inhibit_Z3(type, x) x
#endif
// ------------------------------------
// 32-bit ASM instructions not supported by Turbo C++ 4.0J's built-in
// assembler. Makes no sense to compile with `#pragma inline` (and thus,
// require a 16-bit TASM) just for those.
#define MOVSD __emit__(0x66, 0xA5);
#define REP __emit__(0xF3);