Skip to content

Commit

Permalink
Merge pull request #50 from purpasmart96/gpu_clean_up
Browse files Browse the repository at this point in the history
Clean up some of the GPU code
  • Loading branch information
Normmatt committed Feb 24, 2015
2 parents 33e8769 + 08ed9a7 commit dd0ecdf
Show file tree
Hide file tree
Showing 11 changed files with 409 additions and 419 deletions.
141 changes: 95 additions & 46 deletions inc/gpu.h
Expand Up @@ -59,12 +59,21 @@
#define RGBdowntwoleft 0x40056C


u8* LINEmembuffer;
u8* VRAMbuff;
u8* GSPsharedbuff;
u8* LINEAR_MemoryBuff;
u8* VRAM_MemoryBuff;
u8* GSP_SharedBuff;
extern u32 GPU_Regs[0xFFFF];

#define GSPsharebuffsize 0x1000 //dumped from GSP module in Firm 4.4
#define STACK_MAX 64

#define Format_BYTE 0
#define Format_UBYTE 1
#define Format_SHORT 2
#define Format_FLOAT 3

#define VS_State_INVALID_ADDRESS 0xFFFFFFFF

#define GSP_Shared_Buff_Size 0x1000 //dumped from GSP module in Firm 4.4

#define TRIGGER_IRQ 0x10

Expand All @@ -78,36 +87,36 @@ extern u32 GPU_Regs[0xFFFF];
#define Viewport_depth_range 0x4D
#define Viewport_depth_far_plane 0x4E

#define VSVertexAttributeOutputMap 0x50
#define VS_VertexAttributeOutputMap 0x50
// untill 0x56

#define TEXTURINGSETINGS80 0x80
#define TEXTURCONFIG0SIZE 0x82
#define TEXTURCONFIG0WRAP 0x83
#define TEXTURCONFIG0ADDR 0x85
#define TEXTURCONFIG0TYPE 0x8E
#define TEXTURING_SETINGS 0x80
#define TEXTURE_CONFIG0_SIZE 0x82
#define TEXTURE_CONFIG0_WRAP 0x83
#define TEXTURE_CONFIG0_ADDR 0x85
#define TEXTURE_CONFIG0_TYPE 0x8E

#define TEXTURCONFIG1SIZE 0x92
#define TEXTURCONFIG1WRAP 0x93
#define TEXTURCONFIG1ADDR 0x95
#define TEXTURCONFIG1TYPE 0x96
#define TEXTURE_CONFIG1_SIZE 0x92
#define TEXTURE_CONFIG1_WRAP 0x93
#define TEXTURE_CONFIG1_ADDR 0x95
#define TEXTURE_CONFIG1_TYPE 0x96

#define TEXTURCONFIG2SIZE 0x9A
#define TEXTURCONFIG2WRAP 0x9B
#define TEXTURCONFIG2ADDR 0x9D
#define TEXTURCONFIG2TYPE 0x9E
#define TEXTURE_CONFIG2_SIZE 0x9A
#define TEXTURE_CONFIG2_WRAP 0x9B
#define TEXTURE_CONFIG2_ADDR 0x9D
#define TEXTURE_CONFIG2_TYPE 0x9E

#define GLTEXENV 0xC0
// untill 0x100 with a jump at 0xE0- 0xF0
#define COLOROUTPUT_CONFIG 0x100
#define COLOR_OUTPUT_CONFIG 0x100
#define BLEND_CONFIG 0x101
#define COLORLOGICOP_CONFIG 0x102
#define COLOR_LOGICOP_CONFIG 0x102
#define BLEND_COLOR 0x103
#define ALPHATEST_CONFIG 0x104

#define DEPTHTEST_CONFIG 0x107
#define DEPTHFORMAT 0x116
#define BUFFERFORMAT 0x117
#define DEPTH_FORMAT 0x116
#define BUFFER_FORMAT 0x117

#define DEPTHBUFFER_ADDRESS 0x11C
#define COLORBUFFER_ADDRESS 0x11D
Expand All @@ -123,34 +132,75 @@ extern u32 GPU_Regs[0xFFFF];

#define TriangleTopology 0x25e

#define VSresttriangel 0x25f
#define VS_resttriangel 0x25f

#define VS_INTUNIFORM_I0 0x2B1 //untill I3 in 0x284

#define VSMainOffset 0x2BA
#define VSInputRegisterMap 0x2BB
#define VS_MainOffset 0x2BA
#define VS_InputRegisterMap 0x2BB
// untill 0x2BC
#define VSFloatUniformSetup 0x2C0
#define VS_FloatUniformSetup 0x2C0
// untill 0x2C8
#define VSBeginLoadProgramData 0x2CB
#define VSLoadProgramData 0x2CC
#define VS_BeginLoadProgramData 0x2CB
#define VS_LoadProgramData 0x2CC
//untill 0x2D3
#define VSBeginLoadSwizzleData 0x2D5
#define VSLoadSwizzleData 0x2D6
#define VS_BeginLoadSwizzleData 0x2D5
#define VS_LoadSwizzleData 0x2D6
// untill 0x2DD

#define SHDR_ADD 0x0
#define SHDR_DP3 0x1
#define SHDR_DP4 0x2
#define SHDR_DPH 0x3
#define SHDR_DST 0x4
#define SHDR_EXP 0x5
#define SHDR_LOG 0x6
#define SHDR_LITP 0x7
#define SHDR_MUL 0x8
#define SHDR_SGE 0x9
#define SHDR_SLT 0xA
#define SHDR_FLR 0xB
#define SHDR_MAX 0xC
#define SHDR_MIN 0xD
#define SHDR_RCP 0xE
#define SHDR_RSQ 0xF

#define SHDR_MOVA 0x12
#define SHDR_MOV 0x13

#define SHDR_NOP 0x21
#define SHDR_END 0x22
#define SHDR_BREAKC 0x23
#define SHDR_CALL 0x24
#define SHDR_CALLC 0x25
#define SHDR_CALLB 0x26
#define SHDR_IFB 0x27
#define SHDR_IFC 0x28
#define SHDR_LOOP 0x29
#define SHDR_JPC 0x2C
#define SHDR_JPB 0x2D
#define SHDR_CMP 0x2E
#define SHDR_CMP2 0x2F
#define SHDR_MAD1 0x38
#define SHDR_MAD2 0x39
#define SHDR_MAD3 0x3A
#define SHDR_MAD4 0x3B
#define SHDR_MAD5 0x3C
#define SHDR_MAD6 0x3D
#define SHDR_MAD7 0x3E
#define SHDR_MAD8 0x3F

struct OutputVertex {

// VS output attributes
struct vec4 pos;
struct vec4 position;
struct vec4 dummy; // quaternions (not implemented, yet)
struct vec4 color;
struct vec2 tc0;
struct vec2 tc1;
float tc0_w;
struct vec2 texcoord0;
struct vec2 texcoord1;
float texcoord0_w;
struct vec3 View;
struct vec2 tc2;
struct vec2 texcoord2;

// Padding for optimal alignment
float pad[10];
Expand All @@ -169,21 +219,20 @@ struct clov3 {
u8 v[5];
};


void gpu_Init();
void gpu_WriteReg32(u32 addr, u32 data);
u32 gpu_ReadReg32(u32 addr);
void GPUTriggerCmdReqQueue();
u32 GPURegisterInterruptRelayQueue(u32 Flags, u32 Kevent, u32*threadID, u32*outMemHandle);
u8* get_pymembuffer(u32 addr);
u32 get_py_memrestsize(u32 addr);
u32 gpu_ReadReg32(u32 addr);
void gpu_TriggerCmdReqQueue();
u32 gpu_RegisterInterruptRelayQueue(u32 Flags, u32 Kevent, u32*threadID, u32*outMemHandle);
u8* gpu_GetPhysicalMemoryBuff(u32 addr);
u32 gpu_GetPhysicalMemoryRestSize(u32 addr);
void gpu_SendInterruptToAll(u32 ID);
void gpu_ExecuteCommands(u8* buffer, u32 size);
u32 getsizeofwight(u16 val);
u32 convertvirtualtopys(u32 addr);
void updateFramebuffer();
void updateFramebufferaddr(u32 addr, bool bot);
void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer);
u32 gpu_GetSizeOfWidth(u16 val);
u32 gpu_ConvertVirtualToPhysical(u32 addr);
void gpu_UpdateFramebuffer();
void gpu_UpdateFramebufferAddr(u32 addr, bool bottom);
void gpu_WriteID(u16 ID, u8 mask, u32 size, u32* buffer);

//clipper.c
void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, struct OutputVertex *v2);
Expand Down
51 changes: 25 additions & 26 deletions src/gpu/clipper.c
Expand Up @@ -42,11 +42,10 @@ void InitScreenCoordinates(struct OutputVertex *vtx)
f24to32(GPU_Regs[Viewport_depth_range], &viewport.zscale);
f24to32(GPU_Regs[Viewport_depth_far_plane], &viewport.offset_z);


// TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
vtx->screenpos.v[0] = (vtx->pos.v[0] / vtx->pos.v[3] + 1.0f) * viewport.halfsize_x + viewport.offset_x;
vtx->screenpos.v[1] = (vtx->pos.v[1] / vtx->pos.v[3] + 1.0f) * viewport.halfsize_y + viewport.offset_y;
vtx->screenpos.v[2] = viewport.offset_z + vtx->pos.v[2] / vtx->pos.v[3] * viewport.zscale;
vtx->screenpos.v[0] = (vtx->position.v[0] / vtx->position.v[3] + 1.0f) * viewport.halfsize_x + viewport.offset_x;
vtx->screenpos.v[1] = (vtx->position.v[1] / vtx->position.v[3] + 1.0f) * viewport.halfsize_y + viewport.offset_y;
vtx->screenpos.v[2] = viewport.offset_z + vtx->position.v[2] / vtx->position.v[3] * viewport.zscale;
}

#define max_vertices 10
Expand Down Expand Up @@ -89,37 +88,36 @@ bool PointIsOnLine(struct vec4* vLineStart, struct vec4* vLineEnd, struct vec4*
}

#define Lerp(factor,v0,v1,output) \
output.pos.v[0] = v0.pos.v[0] * (1.f - factor) + v1.pos.v[0] * factor; \
output.pos.v[1] = v0.pos.v[1] * (1.f - factor) + v1.pos.v[1] * factor; \
output.pos.v[2] = v0.pos.v[2] * (1.f - factor) + v1.pos.v[2] * factor; \
output.pos.v[3] = v0.pos.v[3] * (1.f - factor) + v1.pos.v[3] * factor; \
output.position.v[0] = v0.position.v[0] * (1.f - factor) + v1.position.v[0] * factor; \
output.position.v[1] = v0.position.v[1] * (1.f - factor) + v1.position.v[1] * factor; \
output.position.v[2] = v0.position.v[2] * (1.f - factor) + v1.position.v[2] * factor; \
output.position.v[3] = v0.position.v[3] * (1.f - factor) + v1.position.v[3] * factor; \
output.color.v[0] = v0.color.v[0] * (1.f - factor) + v1.color.v[0] * factor; \
output.color.v[1] = v0.color.v[1] * (1.f - factor) + v1.color.v[1] * factor; \
output.color.v[2] = v0.color.v[2] * (1.f - factor) + v1.color.v[2] * factor; \
output.color.v[3] = v0.color.v[3] * (1.f - factor) + v1.color.v[3] * factor; \
output.tc0.v[0] = v0.tc0.v[0] * (1.f - factor) + v1.tc0.v[0] * factor; \
output.tc0.v[1] = v0.tc0.v[1] * (1.f - factor) + v1.tc0.v[1] * factor; \
output.tc1.v[0] = v0.tc1.v[0] * (1.f - factor) + v1.tc1.v[0] * factor; \
output.tc1.v[1] = v0.tc1.v[1] * (1.f - factor) + v1.tc1.v[1] * factor; \
output.tc0_w = v0.tc0_w * (1.f - factor) + v1.tc0_w * factor; \
output.texcoord0.v[0] = v0.texcoord0.v[0] * (1.f - factor) + v1.texcoord0.v[0] * factor; \
output.texcoord0.v[1] = v0.texcoord0.v[1] * (1.f - factor) + v1.texcoord0.v[1] * factor; \
output.texcoord1.v[0] = v0.texcoord1.v[0] * (1.f - factor) + v1.texcoord1.v[0] * factor; \
output.texcoord1.v[1] = v0.texcoord1.v[1] * (1.f - factor) + v1.texcoord1.v[1] * factor; \
output.texcoord0_w = v0.texcoord0_w * (1.f - factor) + v1.texcoord0_w * factor; \
output.View.v[0] = v0.View.v[0] * (1.f - factor) + v1.View.v[0] * factor; \
output.View.v[1] = v0.View.v[1] * (1.f - factor) + v1.View.v[1] * factor; \
output.View.v[2] = v0.View.v[2] * (1.f - factor) + v1.View.v[2] * factor; \
output.tc2.v[0] = v0.tc2.v[0] * (1.f - factor) + v1.tc2.v[0] * factor; \
output.tc2.v[1] = v0.tc2.v[1] * (1.f - factor) + v1.tc2.v[1] * factor;

output.texcoord2.v[0] = v0.texcoord2.v[0] * (1.f - factor) + v1.texcoord2.v[0] * factor; \
output.texcoord2.v[1] = v0.texcoord2.v[1] * (1.f - factor) + v1.texcoord2.v[1] * factor;

#define GetIntersection(v0, v1,edge,output) \
float dp = (v0.pos.v[0] * edge.v[0] + v0.pos.v[1] * edge.v[1] + v0.pos.v[2] * edge.v[2] + v0.pos.v[3] * edge.v[3]); /*DOT*/ \
float dp_prev = (v1.pos.v[0] * edge.v[0] + v1.pos.v[1] * edge.v[1] + v1.pos.v[2] * edge.v[2] + v0.pos.v[3] * edge.v[3]); \
float dp = (v0.position.v[0] * edge.v[0] + v0.position.v[1] * edge.v[1] + v0.position.v[2] * edge.v[2] + v0.position.v[3] * edge.v[3]); /*DOT*/ \
float dp_prev = (v1.position.v[0] * edge.v[0] + v1.position.v[1] * edge.v[1] + v1.position.v[2] * edge.v[2] + v0.position.v[3] * edge.v[3]); \
float factor = dp_prev / (dp_prev - dp); \
Lerp(factor, v0, v1,output);

#define IsInsidev4(v1,v2) ((v1.pos.v[0]*v2.v[0] + v1.pos.v[1]*v2.v[1] + v1.pos.v[2]*v2.v[2] + v1.pos.v[3]*v2.v[3]) <= 0.f)
#define IsOutsidev4(v1,v2) ((v1.pos.v[0]*v2.v[0] + v1.pos.v[1]*v2.v[1] + v1.pos.v[2]*v2.v[2] + v1.pos.v[3]*v2.v[3]) > 0.f)
#define IsInsidev4(v1,v2) ((v1.position.v[0]*v2.v[0] + v1.position.v[1]*v2.v[1] + v1.position.v[2]*v2.v[2] + v1.position.v[3]*v2.v[3]) <= 0.f)
#define IsOutsidev4(v1,v2) ((v1.position.v[0]*v2.v[0] + v1.position.v[1]*v2.v[1] + v1.position.v[2]*v2.v[2] + v1.position.v[3]*v2.v[3]) > 0.f)
void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, struct OutputVertex *v2)
{
if (PointIsOnLine(&v0->pos, &v1->pos, &v2->pos)) //the algo dose not work for them
if (PointIsOnLine(&v0->position, &v1->position, &v2->position)) //the algo dose not work for them
return;
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
u32 input_list_num = 0;
Expand All @@ -140,10 +138,11 @@ void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, s
output_list_num = 0;

struct OutputVertex* reference_vertex = &input_list[input_list_num - 1]; //back
for (int j = 0; j < input_list_num; j++)
for (u32 j = 0; j < input_list_num; j++)
{
// NOTE: This algorithm changes vertex order in some cases!
float test = input_list[j].pos.v[0] * edges[i].v[0] + input_list[j].pos.v[1] * edges[i].v[1] + input_list[j].pos.v[2] * edges[i].v[2] + input_list[j].pos.v[3] * edges[i].v[3];
float test = input_list[j].position.v[0] * edges[i].v[0] + input_list[j].position.v[1]
* edges[i].v[1] + input_list[j].position.v[2] * edges[i].v[2] + input_list[j].position.v[3] * edges[i].v[3];
if (IsInsidev4(input_list[j], edges[i])) {
if (IsOutsidev4((*reference_vertex), edges[i])) {
GetIntersection(input_list[j], (*reference_vertex), edges[i], output_list[output_list_num]);
Expand Down Expand Up @@ -177,9 +176,9 @@ void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, s
"(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
"screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)\n",
i, output_list_num,
vtx0->pos.v[0], vtx0->pos.v[1], vtx0->pos.v[2], vtx0->pos.v[3],
vtx1->pos.v[0], vtx1->pos.v[1], vtx1->pos.v[2], vtx1->pos.v[3],
vtx2->pos.v[0], vtx2->pos.v[1], vtx2->pos.v[2], vtx2->pos.v[3],
vtx0->position.v[0], vtx0->position.v[1], vtx0->position.v[2], vtx0->position.v[3],
vtx1->position.v[0], vtx1->position.v[1], vtx1->position.v[2], vtx1->position.v[3],
vtx2->position.v[0], vtx2->position.v[1], vtx2->position.v[2], vtx2->position.v[3],
vtx0->screenpos.v[0], vtx0->screenpos.v[1], vtx0->screenpos.v[2],
vtx1->screenpos.v[0], vtx1->screenpos.v[1], vtx1->screenpos.v[2],
vtx2->screenpos.v[0], vtx2->screenpos.v[1], vtx2->screenpos.v[2]);
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/commands.c
Expand Up @@ -37,9 +37,9 @@ void gpu_ExecuteCommands(u8* buffer, u32 sizea)
GPUDEBUG("masked data? cmd %04x mask %01x size %03x (%08x) %s \n", ID, mask, size, dataone, grouping ? "grouping" : "");
#endif
if (grouping) {
for (j = 0; j <= size; j++)writeGPUID(ID + j, mask, 1, &datafild[j]);
for (j = 0; j <= size; j++)gpu_WriteID(ID + j, mask, 1, &datafild[j]);
} else {
writeGPUID(ID, mask, size + 1, datafild);
gpu_WriteID(ID, mask, size + 1, datafild);
}
} else {
#ifdef GSP_ENABLE_LOG
Expand Down

0 comments on commit dd0ecdf

Please sign in to comment.