-
Notifications
You must be signed in to change notification settings - Fork 5
/
main.c
304 lines (260 loc) · 10.9 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/*
* ps1-bare-metal - (C) 2023 spicyjpeg
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
* LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*
*
* Having explored the capabilities of the PS1's GPU in previous examples, it is
* now time to focus on the other piece of hardware that makes 3D graphics on
* the PS1 possible: the geometry transformation engine (GTE), a specialized
* coprocessor whose job is to perform various geometry-related calculations
* much faster than the CPU could on its own. To draw a 3D scene the CPU can use
* the GTE to calculate the screen space coordinates of each polygon's vertices,
* then pack those into a display list which will be sent off to the GPU for
* drawing. In this example we're going to draw a spinning model of a cube,
* using the GTE to carry out the computationally heavy tasks of rotation and
* perspective projection.
*
* Unlike any other peripheral on the console, the GTE is not memory-mapped
* but rather accessed through special CPU instructions that require the use of
* inline assembly. This tutorial will thus use the cop0gte.h header I wrote to
* abstract away the low-level assembly required to access GTE registers,
* focusing on its practical usage instead. This example may be harder to follow
* compared to previous ones for people unfamiliar with basic linear algebra and
* 3D geometry concepts, so familiarizing with those is highly recommended.
*/
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include "gpu.h"
#include "ps1/cop0gte.h"
#include "ps1/gpucmd.h"
#include "ps1/registers.h"
#include "trig.h"
// The GTE uses a 4.12 fixed-point format for most values. What this means is
// that most fractional values will be stored as integers by multiplying them by
// a fixed unit, in this case 4096 or 1 << 12 (hence making the fractional part
// 12 bits long). We'll define this unit value to make their handling easier.
#define ONE (1 << 12)
static void setupGTE(int width, int height) {
// Ensure the GTE, which is coprocessor 2, is enabled. MIPS coprocessors are
// enabled through a register in coprocessor 0, which is always accessible.
cop0_setSR(cop0_getSR() | COP0_SR_CU2);
// Set the offset to be added to all calculated coordinates (we want our
// cube to appear at the center of the screen) as well as the field of view.
gte_setXYOrigin(width / 2, height / 2);
gte_setFieldOfView(width);
// Set the scaling factor for Z averaging. For each polygon drawn, the GTE
// will sum the transformed Z coordinates of its vertices multiplied by this
// value in order to derive the ordering table bucket index the polygon will
// be sorted into.
gte_setZScaleFactor(ONE / ORDERING_TABLE_SIZE);
}
// When transforming vertices, the GTE will multiply their vectors by a 3x3
// matrix stored in its registers. This matrix can be used, among other things,
// to rotate the model by multiplying it by the appropriate rotation matrices.
// The two functions below handle manipulation of this matrix.
static void multiplyCurrentMatrixByVectors(GTEMatrix *output) {
// Multiply the GTE's current matrix by the matrix whose column vectors are
// V0/V1/V2, then store the result to the provided location. This has to be
// done one column at a time, as the GTE only supports multiplying a matrix
// by a vector using the MVMVA command.
gte_command(GTE_CMD_MVMVA | GTE_SF | GTE_MX_RT | GTE_V_V0 | GTE_CV_NONE);
output->values[0][0] = gte_getIR1();
output->values[1][0] = gte_getIR2();
output->values[2][0] = gte_getIR3();
gte_command(GTE_CMD_MVMVA | GTE_SF | GTE_MX_RT | GTE_V_V1 | GTE_CV_NONE);
output->values[0][1] = gte_getIR1();
output->values[1][1] = gte_getIR2();
output->values[2][1] = gte_getIR3();
gte_command(GTE_CMD_MVMVA | GTE_SF | GTE_MX_RT | GTE_V_V2 | GTE_CV_NONE);
output->values[0][2] = gte_getIR1();
output->values[1][2] = gte_getIR2();
output->values[2][2] = gte_getIR3();
}
static void rotateCurrentMatrix(int yaw, int pitch, int roll) {
static GTEMatrix multiplied;
int s, c;
// For each axis, compute the rotation matrix then "combine" it with the
// GTE's current matrix by multiplying the two and writing the result back
// to the GTE's registers.
if (yaw) {
s = isin(yaw);
c = icos(yaw);
gte_setColumnVectors(
c, -s, 0,
s, c, 0,
0, 0, ONE
);
multiplyCurrentMatrixByVectors(&multiplied);
gte_loadRotationMatrix(&multiplied);
}
if (pitch) {
s = isin(pitch);
c = icos(pitch);
gte_setColumnVectors(
c, 0, s,
0, ONE, 0,
-s, 0, c
);
multiplyCurrentMatrixByVectors(&multiplied);
gte_loadRotationMatrix(&multiplied);
}
if (roll) {
s = isin(roll);
c = icos(roll);
gte_setColumnVectors(
ONE, 0, 0,
0, c, -s,
0, s, c
);
multiplyCurrentMatrixByVectors(&multiplied);
gte_loadRotationMatrix(&multiplied);
}
}
// We're going to store the 3D model of our cube as two separate arrays, one
// containing a list of unique vertices and the other referencing those vertices
// to build up quadrilateral faces. This approach of having a "palette" of
// vertices, in a similar way to how indexed color works, allows for significant
// memory savings as most if not all faces usually have vertices in common.
typedef struct {
uint8_t vertices[4];
uint32_t color;
} Face;
#define NUM_CUBE_VERTICES 8
#define NUM_CUBE_FACES 6
static const GTEVector16 cubeVertices[NUM_CUBE_VERTICES] = {
{ .x = -32, .y = -32, .z = -32 },
{ .x = 32, .y = -32, .z = -32 },
{ .x = -32, .y = 32, .z = -32 },
{ .x = 32, .y = 32, .z = -32 },
{ .x = -32, .y = -32, .z = 32 },
{ .x = 32, .y = -32, .z = 32 },
{ .x = -32, .y = 32, .z = 32 },
{ .x = 32, .y = 32, .z = 32 }
};
// Note that there are several requirements on the order of vertices:
// - they must be arranged in a Z-like shape rather than clockwise or
// counterclockwise, since the GPU processes a quad with vertices (A, B, C, D)
// as two triangles with vertices (A, B, C) and (B, C, D) respectively;
// - the first 3 vertices must be ordered clockwise when the face is viewed from
// the front, as the code relies on this to determine whether or not the quad
// is facing the camera (see main()).
// For instance, only the first of these faces (viewed from the front) has its
// vertices ordered correctly:
// 0----1 0----1 2----3
// | / | | \/ | | \ |
// | / | | /\ | | \ |
// 2----3 3----2 0----1
// Correct Not Z-shaped Not clockwise
static const Face cubeFaces[NUM_CUBE_FACES] = {
{ .vertices = { 0, 1, 2, 3 }, .color = 0x0000ff },
{ .vertices = { 6, 7, 4, 5 }, .color = 0x00ff00 },
{ .vertices = { 4, 5, 0, 1 }, .color = 0x00ffff },
{ .vertices = { 7, 6, 3, 2 }, .color = 0xff0000 },
{ .vertices = { 6, 4, 2, 0 }, .color = 0xff00ff },
{ .vertices = { 5, 7, 1, 3 }, .color = 0xffff00 }
};
#define SCREEN_WIDTH 320
#define SCREEN_HEIGHT 240
int main(int argc, const char **argv) {
initSerialIO(115200);
if ((GPU_GP1 & GP1_STAT_MODE_BITMASK) == GP1_STAT_MODE_PAL) {
puts("Using PAL mode");
setupGPU(GP1_MODE_PAL, SCREEN_WIDTH, SCREEN_HEIGHT);
} else {
puts("Using NTSC mode");
setupGPU(GP1_MODE_NTSC, SCREEN_WIDTH, SCREEN_HEIGHT);
}
setupGTE(SCREEN_WIDTH, SCREEN_HEIGHT);
DMA_DPCR |= DMA_DPCR_ENABLE << (DMA_GPU * 4);
DMA_DPCR |= DMA_DPCR_ENABLE << (DMA_OTC * 4);
GPU_GP1 = gp1_dmaRequestMode(GP1_DREQ_GP0_WRITE);
GPU_GP1 = gp1_dispBlank(false);
DMAChain dmaChains[2];
bool usingSecondFrame = false;
int frameCounter = 0;
for (;;) {
int bufferX = usingSecondFrame ? SCREEN_WIDTH : 0;
int bufferY = 0;
DMAChain *chain = &dmaChains[usingSecondFrame];
usingSecondFrame = !usingSecondFrame;
uint32_t *ptr;
GPU_GP1 = gp1_fbOffset(bufferX, bufferY);
clearOrderingTable(chain->orderingTable, ORDERING_TABLE_SIZE);
chain->nextPacket = chain->data;
// Reset the GTE's translation vector (added to each vertex) and
// transformation matrix, then modify the matrix to rotate the cube. The
// translation vector is used here to move the cube away from the camera
// so it can be seen.
gte_setTranslationVector(0, 0, 256);
gte_setRotationMatrix(
ONE, 0, 0,
0, ONE, 0,
0, 0, ONE
);
rotateCurrentMatrix(0, frameCounter * 16, frameCounter * 12);
frameCounter++;
// Draw the cube one face at a time.
for (int i = 0; i < NUM_CUBE_FACES; i++) {
const Face *face = &cubeFaces[i];
// Apply perspective projection to the first 3 vertices. The GTE can
// only process up to 3 vertices at a time, so we'll transform the
// last one separately.
gte_loadV0(&cubeVertices[face->vertices[0]]);
gte_loadV1(&cubeVertices[face->vertices[1]]);
gte_loadV2(&cubeVertices[face->vertices[2]]);
gte_command(GTE_CMD_RTPT | GTE_SF);
// Determine the winding order of the vertices on screen. If they
// are ordered clockwise then the face is visible, otherwise it can
// be skipped as it is not facing the camera.
gte_command(GTE_CMD_NCLIP);
if (gte_getMAC0() <= 0)
continue;
// Save the first transformed vertex (the GTE only keeps the X/Y
// coordinates of the last 3 vertices processed and Z coordinates of
// the last 4 vertices processed) and apply projection to the last
// vertex.
uint32_t xy0 = gte_getSXY0();
gte_loadV0(&cubeVertices[face->vertices[3]]);
gte_command(GTE_CMD_RTPS | GTE_SF);
// Calculate the average Z coordinate of all vertices and use it to
// determine the ordering table bucket index for this face.
gte_command(GTE_CMD_AVSZ4 | GTE_SF);
int zIndex = gte_getOTZ();
if ((zIndex < 0) || (zIndex >= ORDERING_TABLE_SIZE))
continue;
// Create a new quad and give its vertices the X/Y coordinates
// calculated by the GTE.
ptr = allocatePacket(chain, zIndex, 5);
ptr[0] = face->color | gp0_shadedQuad(false, false, false);
ptr[1] = xy0;
gte_storeSXY012(&ptr[2]);
}
ptr = allocatePacket(chain, ORDERING_TABLE_SIZE - 1, 3);
ptr[0] = gp0_rgb(64, 64, 64) | gp0_vramFill();
ptr[1] = gp0_xy(bufferX, bufferY);
ptr[2] = gp0_xy(SCREEN_WIDTH, SCREEN_HEIGHT);
ptr = allocatePacket(chain, ORDERING_TABLE_SIZE - 1, 4);
ptr[0] = gp0_texpage(0, true, false);
ptr[1] = gp0_fbOffset1(bufferX, bufferY);
ptr[2] = gp0_fbOffset2(
bufferX + SCREEN_WIDTH - 1, bufferY + SCREEN_HEIGHT - 2
);
ptr[3] = gp0_fbOrigin(bufferX, bufferY);
waitForGP0Ready();
waitForVSync();
sendLinkedList(&(chain->orderingTable)[ORDERING_TABLE_SIZE - 1]);
}
return 0;
}