Merge pull request #50 from purpasmart96/gpu_clean_up

Clean up some of the GPU code
plutooo · Feb 24, 2015 · dd0ecdf · dd0ecdf
2 parents 33e8769 + 08ed9a7
commit dd0ecdf
Show file tree

Hide file tree

Showing 11 changed files with 409 additions and 419 deletions.
diff --git a/inc/gpu.h b/inc/gpu.h
@@ -59,12 +59,21 @@
 #define RGBdowntwoleft 0x40056C
 
 
-u8* LINEmembuffer;
-u8* VRAMbuff;
-u8* GSPsharedbuff;
+u8* LINEAR_MemoryBuff;
+u8* VRAM_MemoryBuff;
+u8* GSP_SharedBuff;
 extern u32 GPU_Regs[0xFFFF];
 
-#define GSPsharebuffsize 0x1000 //dumped from GSP module in Firm 4.4
+#define STACK_MAX 64
+
+#define Format_BYTE 0
+#define Format_UBYTE 1
+#define Format_SHORT 2
+#define Format_FLOAT 3
+
+#define VS_State_INVALID_ADDRESS 0xFFFFFFFF
+
+#define GSP_Shared_Buff_Size 0x1000 //dumped from GSP module in Firm 4.4
 
 #define TRIGGER_IRQ 0x10
 
@@ -78,36 +87,36 @@ extern u32 GPU_Regs[0xFFFF];
 #define Viewport_depth_range 0x4D
 #define Viewport_depth_far_plane 0x4E
 
-#define VSVertexAttributeOutputMap 0x50
+#define VS_VertexAttributeOutputMap 0x50
 // untill 0x56
 
-#define TEXTURINGSETINGS80 0x80
-#define TEXTURCONFIG0SIZE 0x82
-#define TEXTURCONFIG0WRAP 0x83
-#define TEXTURCONFIG0ADDR 0x85
-#define TEXTURCONFIG0TYPE 0x8E
+#define TEXTURING_SETINGS    0x80
+#define TEXTURE_CONFIG0_SIZE 0x82
+#define TEXTURE_CONFIG0_WRAP 0x83
+#define TEXTURE_CONFIG0_ADDR 0x85
+#define TEXTURE_CONFIG0_TYPE 0x8E
 
-#define TEXTURCONFIG1SIZE 0x92
-#define TEXTURCONFIG1WRAP 0x93
-#define TEXTURCONFIG1ADDR 0x95
-#define TEXTURCONFIG1TYPE 0x96
+#define TEXTURE_CONFIG1_SIZE 0x92
+#define TEXTURE_CONFIG1_WRAP 0x93
+#define TEXTURE_CONFIG1_ADDR 0x95
+#define TEXTURE_CONFIG1_TYPE 0x96
 
-#define TEXTURCONFIG2SIZE 0x9A
-#define TEXTURCONFIG2WRAP 0x9B
-#define TEXTURCONFIG2ADDR 0x9D
-#define TEXTURCONFIG2TYPE 0x9E
+#define TEXTURE_CONFIG2_SIZE 0x9A
+#define TEXTURE_CONFIG2_WRAP 0x9B
+#define TEXTURE_CONFIG2_ADDR 0x9D
+#define TEXTURE_CONFIG2_TYPE 0x9E
 
 #define GLTEXENV 0xC0
 // untill 0x100 with a jump at 0xE0- 0xF0
-#define COLOROUTPUT_CONFIG 0x100
+#define COLOR_OUTPUT_CONFIG 0x100
 #define BLEND_CONFIG 0x101
-#define COLORLOGICOP_CONFIG 0x102
+#define COLOR_LOGICOP_CONFIG 0x102
 #define BLEND_COLOR 0x103
 #define ALPHATEST_CONFIG 0x104
 
 #define DEPTHTEST_CONFIG 0x107
-#define DEPTHFORMAT 0x116
-#define BUFFERFORMAT 0x117
+#define DEPTH_FORMAT 0x116
+#define BUFFER_FORMAT 0x117
 
 #define DEPTHBUFFER_ADDRESS 0x11C
 #define COLORBUFFER_ADDRESS 0x11D
@@ -123,34 +132,75 @@ extern u32 GPU_Regs[0xFFFF];
 
 #define TriangleTopology 0x25e
 
-#define VSresttriangel 0x25f
+#define VS_resttriangel 0x25f
 
 #define VS_INTUNIFORM_I0 0x2B1 //untill I3 in 0x284
 
-#define VSMainOffset 0x2BA
-#define VSInputRegisterMap 0x2BB
+#define VS_MainOffset 0x2BA
+#define VS_InputRegisterMap 0x2BB
 // untill 0x2BC
-#define VSFloatUniformSetup 0x2C0
+#define VS_FloatUniformSetup 0x2C0
 // untill 0x2C8
-#define VSBeginLoadProgramData 0x2CB
-#define VSLoadProgramData 0x2CC
+#define VS_BeginLoadProgramData 0x2CB
+#define VS_LoadProgramData 0x2CC
 //untill 0x2D3
-#define VSBeginLoadSwizzleData 0x2D5
-#define VSLoadSwizzleData 0x2D6
+#define VS_BeginLoadSwizzleData 0x2D5
+#define VS_LoadSwizzleData 0x2D6
 // untill 0x2DD
 
+#define SHDR_ADD     0x0
+#define SHDR_DP3     0x1
+#define SHDR_DP4     0x2
+#define SHDR_DPH     0x3
+#define SHDR_DST     0x4
+#define SHDR_EXP     0x5
+#define SHDR_LOG     0x6
+#define SHDR_LITP    0x7
+#define SHDR_MUL     0x8
+#define SHDR_SGE     0x9
+#define SHDR_SLT     0xA
+#define SHDR_FLR     0xB
+#define SHDR_MAX     0xC
+#define SHDR_MIN     0xD
+#define SHDR_RCP     0xE
+#define SHDR_RSQ     0xF
+
+#define SHDR_MOVA    0x12
+#define SHDR_MOV     0x13
+
+#define SHDR_NOP     0x21
+#define SHDR_END     0x22
+#define SHDR_BREAKC  0x23
+#define SHDR_CALL    0x24
+#define SHDR_CALLC   0x25
+#define SHDR_CALLB   0x26
+#define SHDR_IFB     0x27
+#define SHDR_IFC     0x28
+#define SHDR_LOOP    0x29
+#define SHDR_JPC     0x2C
+#define SHDR_JPB     0x2D
+#define SHDR_CMP     0x2E
+#define SHDR_CMP2    0x2F
+#define SHDR_MAD1    0x38
+#define SHDR_MAD2    0x39
+#define SHDR_MAD3    0x3A
+#define SHDR_MAD4    0x3B
+#define SHDR_MAD5    0x3C
+#define SHDR_MAD6    0x3D
+#define SHDR_MAD7    0x3E
+#define SHDR_MAD8    0x3F
 
 struct OutputVertex {
 
     // VS output attributes
-    struct vec4 pos;
+    struct vec4 position;
     struct vec4 dummy; // quaternions (not implemented, yet)
     struct vec4 color;
-    struct vec2 tc0;
-    struct vec2 tc1;
-    float tc0_w;
+    struct vec2 texcoord0;
+    struct vec2 texcoord1;
+    float       texcoord0_w;
     struct vec3 View;
-    struct vec2 tc2;
+    struct vec2 texcoord2;
 
     // Padding for optimal alignment
     float pad[10];
@@ -169,21 +219,20 @@ struct clov3 {
     u8 v[5];
 };
 
-
 void gpu_Init();
 void gpu_WriteReg32(u32 addr, u32 data);
-u32 gpu_ReadReg32(u32 addr);
-void GPUTriggerCmdReqQueue();
-u32 GPURegisterInterruptRelayQueue(u32 Flags, u32 Kevent, u32*threadID, u32*outMemHandle);
-u8* get_pymembuffer(u32 addr);
-u32 get_py_memrestsize(u32 addr);
+u32  gpu_ReadReg32(u32 addr);
+void gpu_TriggerCmdReqQueue();
+u32  gpu_RegisterInterruptRelayQueue(u32 Flags, u32 Kevent, u32*threadID, u32*outMemHandle);
+u8*  gpu_GetPhysicalMemoryBuff(u32 addr);
+u32  gpu_GetPhysicalMemoryRestSize(u32 addr);
 void gpu_SendInterruptToAll(u32 ID);
 void gpu_ExecuteCommands(u8* buffer, u32 size);
-u32 getsizeofwight(u16 val);
-u32 convertvirtualtopys(u32 addr);
-void updateFramebuffer();
-void updateFramebufferaddr(u32 addr, bool bot);
-void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer);
+u32  gpu_GetSizeOfWidth(u16 val);
+u32  gpu_ConvertVirtualToPhysical(u32 addr);
+void gpu_UpdateFramebuffer();
+void gpu_UpdateFramebufferAddr(u32 addr, bool bottom);
+void gpu_WriteID(u16 ID, u8 mask, u32 size, u32* buffer);
 
 //clipper.c
 void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, struct OutputVertex *v2);

diff --git a/src/gpu/clipper.c b/src/gpu/clipper.c
@@ -42,11 +42,10 @@ void InitScreenCoordinates(struct OutputVertex *vtx)
     f24to32(GPU_Regs[Viewport_depth_range], &viewport.zscale);
     f24to32(GPU_Regs[Viewport_depth_far_plane], &viewport.offset_z);
 
-
     // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
-    vtx->screenpos.v[0] = (vtx->pos.v[0] / vtx->pos.v[3] + 1.0f) * viewport.halfsize_x + viewport.offset_x;
-    vtx->screenpos.v[1] = (vtx->pos.v[1] / vtx->pos.v[3] + 1.0f) * viewport.halfsize_y + viewport.offset_y;
-    vtx->screenpos.v[2] = viewport.offset_z + vtx->pos.v[2] / vtx->pos.v[3] * viewport.zscale;
+    vtx->screenpos.v[0] = (vtx->position.v[0] / vtx->position.v[3] + 1.0f) * viewport.halfsize_x + viewport.offset_x;
+    vtx->screenpos.v[1] = (vtx->position.v[1] / vtx->position.v[3] + 1.0f) * viewport.halfsize_y + viewport.offset_y;
+    vtx->screenpos.v[2] = viewport.offset_z + vtx->position.v[2] / vtx->position.v[3] * viewport.zscale;
 }
 
 #define max_vertices 10
@@ -89,37 +88,36 @@ bool PointIsOnLine(struct vec4* vLineStart, struct vec4* vLineEnd, struct vec4*
 }
 
 #define Lerp(factor,v0,v1,output)                                                                             \
-    output.pos.v[0] = v0.pos.v[0] * (1.f - factor) + v1.pos.v[0] * factor;                                    \
-    output.pos.v[1] = v0.pos.v[1] * (1.f - factor) + v1.pos.v[1] * factor;                                    \
-    output.pos.v[2] = v0.pos.v[2] * (1.f - factor) + v1.pos.v[2] * factor;                                    \
-    output.pos.v[3] = v0.pos.v[3] * (1.f - factor) + v1.pos.v[3] * factor;                                    \
+    output.position.v[0] = v0.position.v[0] * (1.f - factor) + v1.position.v[0] * factor;                     \
+    output.position.v[1] = v0.position.v[1] * (1.f - factor) + v1.position.v[1] * factor;                     \
+    output.position.v[2] = v0.position.v[2] * (1.f - factor) + v1.position.v[2] * factor;                     \
+    output.position.v[3] = v0.position.v[3] * (1.f - factor) + v1.position.v[3] * factor;                     \
     output.color.v[0] = v0.color.v[0] * (1.f - factor) + v1.color.v[0] * factor;                              \
     output.color.v[1] = v0.color.v[1] * (1.f - factor) + v1.color.v[1] * factor;                              \
     output.color.v[2] = v0.color.v[2] * (1.f - factor) + v1.color.v[2] * factor;                              \
     output.color.v[3] = v0.color.v[3] * (1.f - factor) + v1.color.v[3] * factor;                              \
-    output.tc0.v[0] = v0.tc0.v[0] * (1.f - factor) + v1.tc0.v[0] * factor;                                    \
-    output.tc0.v[1] = v0.tc0.v[1] * (1.f - factor) + v1.tc0.v[1] * factor;                                    \
-    output.tc1.v[0] = v0.tc1.v[0] * (1.f - factor) + v1.tc1.v[0] * factor;                                    \
-    output.tc1.v[1] = v0.tc1.v[1] * (1.f - factor) + v1.tc1.v[1] * factor;                                    \
-    output.tc0_w = v0.tc0_w * (1.f - factor) + v1.tc0_w * factor;                                             \
+    output.texcoord0.v[0] = v0.texcoord0.v[0] * (1.f - factor) + v1.texcoord0.v[0] * factor;                  \
+    output.texcoord0.v[1] = v0.texcoord0.v[1] * (1.f - factor) + v1.texcoord0.v[1] * factor;                  \
+    output.texcoord1.v[0] = v0.texcoord1.v[0] * (1.f - factor) + v1.texcoord1.v[0] * factor;                  \
+    output.texcoord1.v[1] = v0.texcoord1.v[1] * (1.f - factor) + v1.texcoord1.v[1] * factor;                  \
+    output.texcoord0_w = v0.texcoord0_w * (1.f - factor) + v1.texcoord0_w * factor;                           \
     output.View.v[0] = v0.View.v[0] * (1.f - factor) + v1.View.v[0] * factor;                                 \
     output.View.v[1] = v0.View.v[1] * (1.f - factor) + v1.View.v[1] * factor;                                 \
     output.View.v[2] = v0.View.v[2] * (1.f - factor) + v1.View.v[2] * factor;                                 \
-    output.tc2.v[0] = v0.tc2.v[0] * (1.f - factor) + v1.tc2.v[0] * factor;                                    \
-    output.tc2.v[1] = v0.tc2.v[1] * (1.f - factor) + v1.tc2.v[1] * factor;
-
+    output.texcoord2.v[0] = v0.texcoord2.v[0] * (1.f - factor) + v1.texcoord2.v[0] * factor;                  \
+    output.texcoord2.v[1] = v0.texcoord2.v[1] * (1.f - factor) + v1.texcoord2.v[1] * factor;
 
 #define GetIntersection(v0, v1,edge,output)                                                                                        \
-    float dp = (v0.pos.v[0] * edge.v[0] + v0.pos.v[1] * edge.v[1] + v0.pos.v[2] * edge.v[2] + v0.pos.v[3] * edge.v[3]); /*DOT*/    \
-    float dp_prev = (v1.pos.v[0] * edge.v[0] + v1.pos.v[1] * edge.v[1] + v1.pos.v[2] * edge.v[2] + v0.pos.v[3] * edge.v[3]);       \
+    float dp = (v0.position.v[0] * edge.v[0] + v0.position.v[1] * edge.v[1] + v0.position.v[2] * edge.v[2] + v0.position.v[3] * edge.v[3]); /*DOT*/    \
+    float dp_prev = (v1.position.v[0] * edge.v[0] + v1.position.v[1] * edge.v[1] + v1.position.v[2] * edge.v[2] + v0.position.v[3] * edge.v[3]);       \
     float factor = dp_prev / (dp_prev - dp);                                                                                       \
     Lerp(factor, v0, v1,output);                                                                 
 
-#define IsInsidev4(v1,v2) ((v1.pos.v[0]*v2.v[0] + v1.pos.v[1]*v2.v[1] + v1.pos.v[2]*v2.v[2] + v1.pos.v[3]*v2.v[3]) <= 0.f)
-#define IsOutsidev4(v1,v2) ((v1.pos.v[0]*v2.v[0] + v1.pos.v[1]*v2.v[1] + v1.pos.v[2]*v2.v[2] + v1.pos.v[3]*v2.v[3]) > 0.f)
+#define IsInsidev4(v1,v2) ((v1.position.v[0]*v2.v[0] + v1.position.v[1]*v2.v[1] + v1.position.v[2]*v2.v[2] + v1.position.v[3]*v2.v[3]) <= 0.f)
+#define IsOutsidev4(v1,v2) ((v1.position.v[0]*v2.v[0] + v1.position.v[1]*v2.v[1] + v1.position.v[2]*v2.v[2] + v1.position.v[3]*v2.v[3]) > 0.f)
 void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, struct OutputVertex *v2)
 {
-    if (PointIsOnLine(&v0->pos, &v1->pos, &v2->pos)) //the algo dose not work for them
+    if (PointIsOnLine(&v0->position, &v1->position, &v2->position)) //the algo dose not work for them
         return;
     // Simple implementation of the Sutherland-Hodgman clipping algorithm.
     u32 input_list_num = 0;
@@ -140,10 +138,11 @@ void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, s
         output_list_num = 0;
 
         struct OutputVertex* reference_vertex = &input_list[input_list_num - 1]; //back
-        for (int j = 0; j < input_list_num; j++)
+        for (u32 j = 0; j < input_list_num; j++)
         {
             // NOTE: This algorithm changes vertex order in some cases!
-            float test = input_list[j].pos.v[0] * edges[i].v[0] + input_list[j].pos.v[1] * edges[i].v[1] + input_list[j].pos.v[2] * edges[i].v[2] + input_list[j].pos.v[3] * edges[i].v[3];
+            float test = input_list[j].position.v[0] * edges[i].v[0] + input_list[j].position.v[1]
+            * edges[i].v[1] + input_list[j].position.v[2] * edges[i].v[2] + input_list[j].position.v[3] * edges[i].v[3];
             if (IsInsidev4(input_list[j], edges[i])) {
                 if (IsOutsidev4((*reference_vertex), edges[i])) {
                     GetIntersection(input_list[j], (*reference_vertex), edges[i], output_list[output_list_num]);
@@ -177,9 +176,9 @@ void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, s
                 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
                 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)\n",
                 i, output_list_num,
-                vtx0->pos.v[0], vtx0->pos.v[1], vtx0->pos.v[2], vtx0->pos.v[3],
-                vtx1->pos.v[0], vtx1->pos.v[1], vtx1->pos.v[2], vtx1->pos.v[3],
-                vtx2->pos.v[0], vtx2->pos.v[1], vtx2->pos.v[2], vtx2->pos.v[3],
+                vtx0->position.v[0],  vtx0->position.v[1],  vtx0->position.v[2], vtx0->position.v[3],
+                vtx1->position.v[0],  vtx1->position.v[1],  vtx1->position.v[2], vtx1->position.v[3],
+                vtx2->position.v[0],  vtx2->position.v[1],  vtx2->position.v[2], vtx2->position.v[3],
                 vtx0->screenpos.v[0], vtx0->screenpos.v[1], vtx0->screenpos.v[2],
                 vtx1->screenpos.v[0], vtx1->screenpos.v[1], vtx1->screenpos.v[2],
                 vtx2->screenpos.v[0], vtx2->screenpos.v[1], vtx2->screenpos.v[2]);

diff --git a/src/gpu/commands.c b/src/gpu/commands.c
@@ -37,9 +37,9 @@ void gpu_ExecuteCommands(u8* buffer, u32 sizea)
                 GPUDEBUG("masked data? cmd %04x mask %01x size %03x (%08x) %s \n", ID, mask, size, dataone, grouping ? "grouping" : "");
 #endif
             if (grouping) {
-                for (j = 0; j <= size; j++)writeGPUID(ID + j, mask, 1, &datafild[j]);
+                for (j = 0; j <= size; j++)gpu_WriteID(ID + j, mask, 1, &datafild[j]);
             } else {
-                writeGPUID(ID, mask, size + 1, datafild);
+                gpu_WriteID(ID, mask, size + 1, datafild);
             }
         } else {
 #ifdef GSP_ENABLE_LOG