From 08ed9a730ef37c729b8ca1784dff964b93db3948 Mon Sep 17 00:00:00 2001 From: purpasmart96 Date: Sat, 21 Feb 2015 17:37:25 -0800 Subject: [PATCH] Clean up some of the GPU code --- inc/gpu.h | 141 +++++++++----- src/gpu/clipper.c | 51 +++--- src/gpu/commands.c | 4 +- src/gpu/gpu.c | 403 +++++++++++++++++------------------------ src/gpu/rasterizer.c | 83 +++++---- src/mem.c | 10 +- src/screen.c | 6 +- src/services/apt_s.c | 2 +- src/services/apt_u.c | 2 +- src/services/gsp_gpu.c | 114 ++++++------ src/syscalls/memory.c | 12 +- 11 files changed, 409 insertions(+), 419 deletions(-) diff --git a/inc/gpu.h b/inc/gpu.h index cf7f924..f22c9d3 100644 --- a/inc/gpu.h +++ b/inc/gpu.h @@ -59,12 +59,21 @@ #define RGBdowntwoleft 0x40056C -u8* LINEmembuffer; -u8* VRAMbuff; -u8* GSPsharedbuff; +u8* LINEAR_MemoryBuff; +u8* VRAM_MemoryBuff; +u8* GSP_SharedBuff; extern u32 GPU_Regs[0xFFFF]; -#define GSPsharebuffsize 0x1000 //dumped from GSP module in Firm 4.4 +#define STACK_MAX 64 + +#define Format_BYTE 0 +#define Format_UBYTE 1 +#define Format_SHORT 2 +#define Format_FLOAT 3 + +#define VS_State_INVALID_ADDRESS 0xFFFFFFFF + +#define GSP_Shared_Buff_Size 0x1000 //dumped from GSP module in Firm 4.4 #define TRIGGER_IRQ 0x10 @@ -78,36 +87,36 @@ extern u32 GPU_Regs[0xFFFF]; #define Viewport_depth_range 0x4D #define Viewport_depth_far_plane 0x4E -#define VSVertexAttributeOutputMap 0x50 +#define VS_VertexAttributeOutputMap 0x50 // untill 0x56 -#define TEXTURINGSETINGS80 0x80 -#define TEXTURCONFIG0SIZE 0x82 -#define TEXTURCONFIG0WRAP 0x83 -#define TEXTURCONFIG0ADDR 0x85 -#define TEXTURCONFIG0TYPE 0x8E +#define TEXTURING_SETINGS 0x80 +#define TEXTURE_CONFIG0_SIZE 0x82 +#define TEXTURE_CONFIG0_WRAP 0x83 +#define TEXTURE_CONFIG0_ADDR 0x85 +#define TEXTURE_CONFIG0_TYPE 0x8E -#define TEXTURCONFIG1SIZE 0x92 -#define TEXTURCONFIG1WRAP 0x93 -#define TEXTURCONFIG1ADDR 0x95 -#define TEXTURCONFIG1TYPE 0x96 +#define TEXTURE_CONFIG1_SIZE 0x92 +#define TEXTURE_CONFIG1_WRAP 0x93 +#define TEXTURE_CONFIG1_ADDR 0x95 +#define TEXTURE_CONFIG1_TYPE 0x96 -#define TEXTURCONFIG2SIZE 0x9A -#define TEXTURCONFIG2WRAP 0x9B -#define TEXTURCONFIG2ADDR 0x9D -#define TEXTURCONFIG2TYPE 0x9E +#define TEXTURE_CONFIG2_SIZE 0x9A +#define TEXTURE_CONFIG2_WRAP 0x9B +#define TEXTURE_CONFIG2_ADDR 0x9D +#define TEXTURE_CONFIG2_TYPE 0x9E #define GLTEXENV 0xC0 // untill 0x100 with a jump at 0xE0- 0xF0 -#define COLOROUTPUT_CONFIG 0x100 +#define COLOR_OUTPUT_CONFIG 0x100 #define BLEND_CONFIG 0x101 -#define COLORLOGICOP_CONFIG 0x102 +#define COLOR_LOGICOP_CONFIG 0x102 #define BLEND_COLOR 0x103 #define ALPHATEST_CONFIG 0x104 #define DEPTHTEST_CONFIG 0x107 -#define DEPTHFORMAT 0x116 -#define BUFFERFORMAT 0x117 +#define DEPTH_FORMAT 0x116 +#define BUFFER_FORMAT 0x117 #define DEPTHBUFFER_ADDRESS 0x11C #define COLORBUFFER_ADDRESS 0x11D @@ -123,34 +132,75 @@ extern u32 GPU_Regs[0xFFFF]; #define TriangleTopology 0x25e -#define VSresttriangel 0x25f +#define VS_resttriangel 0x25f #define VS_INTUNIFORM_I0 0x2B1 //untill I3 in 0x284 -#define VSMainOffset 0x2BA -#define VSInputRegisterMap 0x2BB +#define VS_MainOffset 0x2BA +#define VS_InputRegisterMap 0x2BB // untill 0x2BC -#define VSFloatUniformSetup 0x2C0 +#define VS_FloatUniformSetup 0x2C0 // untill 0x2C8 -#define VSBeginLoadProgramData 0x2CB -#define VSLoadProgramData 0x2CC +#define VS_BeginLoadProgramData 0x2CB +#define VS_LoadProgramData 0x2CC //untill 0x2D3 -#define VSBeginLoadSwizzleData 0x2D5 -#define VSLoadSwizzleData 0x2D6 +#define VS_BeginLoadSwizzleData 0x2D5 +#define VS_LoadSwizzleData 0x2D6 // untill 0x2DD +#define SHDR_ADD 0x0 +#define SHDR_DP3 0x1 +#define SHDR_DP4 0x2 +#define SHDR_DPH 0x3 +#define SHDR_DST 0x4 +#define SHDR_EXP 0x5 +#define SHDR_LOG 0x6 +#define SHDR_LITP 0x7 +#define SHDR_MUL 0x8 +#define SHDR_SGE 0x9 +#define SHDR_SLT 0xA +#define SHDR_FLR 0xB +#define SHDR_MAX 0xC +#define SHDR_MIN 0xD +#define SHDR_RCP 0xE +#define SHDR_RSQ 0xF + +#define SHDR_MOVA 0x12 +#define SHDR_MOV 0x13 + +#define SHDR_NOP 0x21 +#define SHDR_END 0x22 +#define SHDR_BREAKC 0x23 +#define SHDR_CALL 0x24 +#define SHDR_CALLC 0x25 +#define SHDR_CALLB 0x26 +#define SHDR_IFB 0x27 +#define SHDR_IFC 0x28 +#define SHDR_LOOP 0x29 +#define SHDR_JPC 0x2C +#define SHDR_JPB 0x2D +#define SHDR_CMP 0x2E +#define SHDR_CMP2 0x2F +#define SHDR_MAD1 0x38 +#define SHDR_MAD2 0x39 +#define SHDR_MAD3 0x3A +#define SHDR_MAD4 0x3B +#define SHDR_MAD5 0x3C +#define SHDR_MAD6 0x3D +#define SHDR_MAD7 0x3E +#define SHDR_MAD8 0x3F struct OutputVertex { // VS output attributes - struct vec4 pos; + struct vec4 position; struct vec4 dummy; // quaternions (not implemented, yet) struct vec4 color; - struct vec2 tc0; - struct vec2 tc1; - float tc0_w; + struct vec2 texcoord0; + struct vec2 texcoord1; + float texcoord0_w; struct vec3 View; - struct vec2 tc2; + struct vec2 texcoord2; // Padding for optimal alignment float pad[10]; @@ -169,21 +219,20 @@ struct clov3 { u8 v[5]; }; - void gpu_Init(); void gpu_WriteReg32(u32 addr, u32 data); -u32 gpu_ReadReg32(u32 addr); -void GPUTriggerCmdReqQueue(); -u32 GPURegisterInterruptRelayQueue(u32 Flags, u32 Kevent, u32*threadID, u32*outMemHandle); -u8* get_pymembuffer(u32 addr); -u32 get_py_memrestsize(u32 addr); +u32 gpu_ReadReg32(u32 addr); +void gpu_TriggerCmdReqQueue(); +u32 gpu_RegisterInterruptRelayQueue(u32 Flags, u32 Kevent, u32*threadID, u32*outMemHandle); +u8* gpu_GetPhysicalMemoryBuff(u32 addr); +u32 gpu_GetPhysicalMemoryRestSize(u32 addr); void gpu_SendInterruptToAll(u32 ID); void gpu_ExecuteCommands(u8* buffer, u32 size); -u32 getsizeofwight(u16 val); -u32 convertvirtualtopys(u32 addr); -void updateFramebuffer(); -void updateFramebufferaddr(u32 addr, bool bot); -void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer); +u32 gpu_GetSizeOfWidth(u16 val); +u32 gpu_ConvertVirtualToPhysical(u32 addr); +void gpu_UpdateFramebuffer(); +void gpu_UpdateFramebufferAddr(u32 addr, bool bottom); +void gpu_WriteID(u16 ID, u8 mask, u32 size, u32* buffer); //clipper.c void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, struct OutputVertex *v2); diff --git a/src/gpu/clipper.c b/src/gpu/clipper.c index 18ada83..3d90b5f 100644 --- a/src/gpu/clipper.c +++ b/src/gpu/clipper.c @@ -42,11 +42,10 @@ void InitScreenCoordinates(struct OutputVertex *vtx) f24to32(GPU_Regs[Viewport_depth_range], &viewport.zscale); f24to32(GPU_Regs[Viewport_depth_far_plane], &viewport.offset_z); - // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not - vtx->screenpos.v[0] = (vtx->pos.v[0] / vtx->pos.v[3] + 1.0f) * viewport.halfsize_x + viewport.offset_x; - vtx->screenpos.v[1] = (vtx->pos.v[1] / vtx->pos.v[3] + 1.0f) * viewport.halfsize_y + viewport.offset_y; - vtx->screenpos.v[2] = viewport.offset_z + vtx->pos.v[2] / vtx->pos.v[3] * viewport.zscale; + vtx->screenpos.v[0] = (vtx->position.v[0] / vtx->position.v[3] + 1.0f) * viewport.halfsize_x + viewport.offset_x; + vtx->screenpos.v[1] = (vtx->position.v[1] / vtx->position.v[3] + 1.0f) * viewport.halfsize_y + viewport.offset_y; + vtx->screenpos.v[2] = viewport.offset_z + vtx->position.v[2] / vtx->position.v[3] * viewport.zscale; } #define max_vertices 10 @@ -89,37 +88,36 @@ bool PointIsOnLine(struct vec4* vLineStart, struct vec4* vLineEnd, struct vec4* } #define Lerp(factor,v0,v1,output) \ - output.pos.v[0] = v0.pos.v[0] * (1.f - factor) + v1.pos.v[0] * factor; \ - output.pos.v[1] = v0.pos.v[1] * (1.f - factor) + v1.pos.v[1] * factor; \ - output.pos.v[2] = v0.pos.v[2] * (1.f - factor) + v1.pos.v[2] * factor; \ - output.pos.v[3] = v0.pos.v[3] * (1.f - factor) + v1.pos.v[3] * factor; \ + output.position.v[0] = v0.position.v[0] * (1.f - factor) + v1.position.v[0] * factor; \ + output.position.v[1] = v0.position.v[1] * (1.f - factor) + v1.position.v[1] * factor; \ + output.position.v[2] = v0.position.v[2] * (1.f - factor) + v1.position.v[2] * factor; \ + output.position.v[3] = v0.position.v[3] * (1.f - factor) + v1.position.v[3] * factor; \ output.color.v[0] = v0.color.v[0] * (1.f - factor) + v1.color.v[0] * factor; \ output.color.v[1] = v0.color.v[1] * (1.f - factor) + v1.color.v[1] * factor; \ output.color.v[2] = v0.color.v[2] * (1.f - factor) + v1.color.v[2] * factor; \ output.color.v[3] = v0.color.v[3] * (1.f - factor) + v1.color.v[3] * factor; \ - output.tc0.v[0] = v0.tc0.v[0] * (1.f - factor) + v1.tc0.v[0] * factor; \ - output.tc0.v[1] = v0.tc0.v[1] * (1.f - factor) + v1.tc0.v[1] * factor; \ - output.tc1.v[0] = v0.tc1.v[0] * (1.f - factor) + v1.tc1.v[0] * factor; \ - output.tc1.v[1] = v0.tc1.v[1] * (1.f - factor) + v1.tc1.v[1] * factor; \ - output.tc0_w = v0.tc0_w * (1.f - factor) + v1.tc0_w * factor; \ + output.texcoord0.v[0] = v0.texcoord0.v[0] * (1.f - factor) + v1.texcoord0.v[0] * factor; \ + output.texcoord0.v[1] = v0.texcoord0.v[1] * (1.f - factor) + v1.texcoord0.v[1] * factor; \ + output.texcoord1.v[0] = v0.texcoord1.v[0] * (1.f - factor) + v1.texcoord1.v[0] * factor; \ + output.texcoord1.v[1] = v0.texcoord1.v[1] * (1.f - factor) + v1.texcoord1.v[1] * factor; \ + output.texcoord0_w = v0.texcoord0_w * (1.f - factor) + v1.texcoord0_w * factor; \ output.View.v[0] = v0.View.v[0] * (1.f - factor) + v1.View.v[0] * factor; \ output.View.v[1] = v0.View.v[1] * (1.f - factor) + v1.View.v[1] * factor; \ output.View.v[2] = v0.View.v[2] * (1.f - factor) + v1.View.v[2] * factor; \ - output.tc2.v[0] = v0.tc2.v[0] * (1.f - factor) + v1.tc2.v[0] * factor; \ - output.tc2.v[1] = v0.tc2.v[1] * (1.f - factor) + v1.tc2.v[1] * factor; - + output.texcoord2.v[0] = v0.texcoord2.v[0] * (1.f - factor) + v1.texcoord2.v[0] * factor; \ + output.texcoord2.v[1] = v0.texcoord2.v[1] * (1.f - factor) + v1.texcoord2.v[1] * factor; #define GetIntersection(v0, v1,edge,output) \ - float dp = (v0.pos.v[0] * edge.v[0] + v0.pos.v[1] * edge.v[1] + v0.pos.v[2] * edge.v[2] + v0.pos.v[3] * edge.v[3]); /*DOT*/ \ - float dp_prev = (v1.pos.v[0] * edge.v[0] + v1.pos.v[1] * edge.v[1] + v1.pos.v[2] * edge.v[2] + v0.pos.v[3] * edge.v[3]); \ + float dp = (v0.position.v[0] * edge.v[0] + v0.position.v[1] * edge.v[1] + v0.position.v[2] * edge.v[2] + v0.position.v[3] * edge.v[3]); /*DOT*/ \ + float dp_prev = (v1.position.v[0] * edge.v[0] + v1.position.v[1] * edge.v[1] + v1.position.v[2] * edge.v[2] + v0.position.v[3] * edge.v[3]); \ float factor = dp_prev / (dp_prev - dp); \ Lerp(factor, v0, v1,output); -#define IsInsidev4(v1,v2) ((v1.pos.v[0]*v2.v[0] + v1.pos.v[1]*v2.v[1] + v1.pos.v[2]*v2.v[2] + v1.pos.v[3]*v2.v[3]) <= 0.f) -#define IsOutsidev4(v1,v2) ((v1.pos.v[0]*v2.v[0] + v1.pos.v[1]*v2.v[1] + v1.pos.v[2]*v2.v[2] + v1.pos.v[3]*v2.v[3]) > 0.f) +#define IsInsidev4(v1,v2) ((v1.position.v[0]*v2.v[0] + v1.position.v[1]*v2.v[1] + v1.position.v[2]*v2.v[2] + v1.position.v[3]*v2.v[3]) <= 0.f) +#define IsOutsidev4(v1,v2) ((v1.position.v[0]*v2.v[0] + v1.position.v[1]*v2.v[1] + v1.position.v[2]*v2.v[2] + v1.position.v[3]*v2.v[3]) > 0.f) void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, struct OutputVertex *v2) { - if (PointIsOnLine(&v0->pos, &v1->pos, &v2->pos)) //the algo dose not work for them + if (PointIsOnLine(&v0->position, &v1->position, &v2->position)) //the algo dose not work for them return; // Simple implementation of the Sutherland-Hodgman clipping algorithm. u32 input_list_num = 0; @@ -140,10 +138,11 @@ void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, s output_list_num = 0; struct OutputVertex* reference_vertex = &input_list[input_list_num - 1]; //back - for (int j = 0; j < input_list_num; j++) + for (u32 j = 0; j < input_list_num; j++) { // NOTE: This algorithm changes vertex order in some cases! - float test = input_list[j].pos.v[0] * edges[i].v[0] + input_list[j].pos.v[1] * edges[i].v[1] + input_list[j].pos.v[2] * edges[i].v[2] + input_list[j].pos.v[3] * edges[i].v[3]; + float test = input_list[j].position.v[0] * edges[i].v[0] + input_list[j].position.v[1] + * edges[i].v[1] + input_list[j].position.v[2] * edges[i].v[2] + input_list[j].position.v[3] * edges[i].v[3]; if (IsInsidev4(input_list[j], edges[i])) { if (IsOutsidev4((*reference_vertex), edges[i])) { GetIntersection(input_list[j], (*reference_vertex), edges[i], output_list[output_list_num]); @@ -177,9 +176,9 @@ void Clipper_ProcessTriangle(struct OutputVertex *v0, struct OutputVertex *v1, s "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)\n", i, output_list_num, - vtx0->pos.v[0], vtx0->pos.v[1], vtx0->pos.v[2], vtx0->pos.v[3], - vtx1->pos.v[0], vtx1->pos.v[1], vtx1->pos.v[2], vtx1->pos.v[3], - vtx2->pos.v[0], vtx2->pos.v[1], vtx2->pos.v[2], vtx2->pos.v[3], + vtx0->position.v[0], vtx0->position.v[1], vtx0->position.v[2], vtx0->position.v[3], + vtx1->position.v[0], vtx1->position.v[1], vtx1->position.v[2], vtx1->position.v[3], + vtx2->position.v[0], vtx2->position.v[1], vtx2->position.v[2], vtx2->position.v[3], vtx0->screenpos.v[0], vtx0->screenpos.v[1], vtx0->screenpos.v[2], vtx1->screenpos.v[0], vtx1->screenpos.v[1], vtx1->screenpos.v[2], vtx2->screenpos.v[0], vtx2->screenpos.v[1], vtx2->screenpos.v[2]); diff --git a/src/gpu/commands.c b/src/gpu/commands.c index 1dd11db..ca77107 100644 --- a/src/gpu/commands.c +++ b/src/gpu/commands.c @@ -37,9 +37,9 @@ void gpu_ExecuteCommands(u8* buffer, u32 sizea) GPUDEBUG("masked data? cmd %04x mask %01x size %03x (%08x) %s \n", ID, mask, size, dataone, grouping ? "grouping" : ""); #endif if (grouping) { - for (j = 0; j <= size; j++)writeGPUID(ID + j, mask, 1, &datafild[j]); + for (j = 0; j <= size; j++)gpu_WriteID(ID + j, mask, 1, &datafild[j]); } else { - writeGPUID(ID, mask, size + 1, datafild); + gpu_WriteID(ID, mask, size + 1, datafild); } } else { #ifdef GSP_ENABLE_LOG diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c index 5260cfa..cb39dc2 100644 --- a/src/gpu/gpu.c +++ b/src/gpu/gpu.c @@ -28,26 +28,30 @@ #include "mem.h" #include "gpu.h" - #define GSP_ENABLE_LOG u32 GPU_Regs[0xFFFF]; //do they all exist don't know but well -u32 GPUshadercodebuffer[0xFFFF]; //how big is the buffer? +u32 GPU_ShaderCodeBuffer[0xFFFF]; //how big is the buffer? u32 swizzle_data[0xFFFF]; //how big is the buffer? -extern int noscreen; +u8 VS_FloatUniformSetUpTempBufferCurrent = 0; +u32 VS_FloatUniformSetUpTempBuffer[4]; +u32 render_addr = 0; //Can these be removed? +u32 unknown_addr = 0; + +extern int noscreen; void gpu_Init() { - LINEmembuffer = malloc(0x8000000); - VRAMbuff = malloc(0x800000);//malloc(0x600000); - GSPsharedbuff = malloc(GSPsharebuffsize); + LINEAR_MemoryBuff = malloc(0x8000000); + VRAM_MemoryBuff = malloc(0x800000);//malloc(0x600000); + GSP_SharedBuff = malloc(GSP_Shared_Buff_Size); - memset(LINEmembuffer, 0, 0x8000000); - memset(VRAMbuff, 0, 0x600000); - memset(GSPsharedbuff, 0, GSPsharebuffsize); - memset(GPUshadercodebuffer, 0, 0xFFFF*4); + memset(LINEAR_MemoryBuff, 0, 0x8000000); + memset(VRAM_MemoryBuff, 0, 0x600000); + memset(GSP_SharedBuff, 0, GSP_Shared_Buff_Size); + memset(GPU_ShaderCodeBuffer, 0, 0xFFFF * 4); gpu_WriteReg32(framebuffer_top_size, 0x019000f0); gpu_WriteReg32(frameselecttop, 0); @@ -62,15 +66,15 @@ void gpu_Init() //mem_Write32(0x1FF81080, (u32)0.0f); } -u32 convertvirtualtopys(u32 addr) //todo +u32 gpu_ConvertVirtualToPhysical(u32 addr) //todo { if (addr >= 0x14000000 && addr < 0x1C000000)return addr + 0xC000000; //FCRAM if (addr >= 0x1F000000 && addr < 0x1F600000)return addr - 0x7000000; //VRAM - GPUDEBUG("can't convert vitual to py %08x\n",addr); + GPUDEBUG("Can't convert virtual to physical %08x\n",addr); return 0; } -u32 getsizeofwight(u16 val) //this is the size of pixel +u32 gpu_GetSizeOfWidth(u16 val) //this is the size of pixel { switch (val&0x7000) { //check this case 0x0000: //RGBA8 @@ -89,9 +93,6 @@ u32 getsizeofwight(u16 val) //this is the size of pixel } } -u32 renderaddr = 0; -u32 unknownaddr = 0; - static void updateGPUintreg(u32 data, u32 ID, u8 mask) { int i; @@ -102,23 +103,6 @@ static void updateGPUintreg(u32 data, u32 ID, u8 mask) } } - - -u8 VSFloatUniformSetuptembuffercurrent = 0; -u32 VSFloatUniformSetuptembuffer[4]; - - -#define Format_BYTE 0 -#define Format_UBYTE 1 -#define Format_SHORT 2 -#define Format_FLOAT 3 - - - - -#define VertexShaderState_INVALID_ADDRESS 0xFFFFFFFF - -#define STACK_MAX 64 typedef struct Stack { u32 data[STACK_MAX]; int size; @@ -143,6 +127,7 @@ int Stack_Top(struct Stack *S) return S->data[S->size - 1]; } + int Stack_Top_DEC(struct Stack *S) { if (S->size == 0) { @@ -222,11 +207,6 @@ static u32 getattribute_register_map(u32 reg, u32 data1, u32 data2) } } - - - - - static struct OutputVertex buffer[2]; static int buffer_index = 0; // TODO: reset this on emulation restart static int strip_ready = 0; @@ -269,60 +249,21 @@ static void PrimitiveAssembly_SubmitVertex(struct OutputVertex* vtx) } } -#define SHDR_ADD 0x0 -#define SHDR_DP3 0x1 -#define SHDR_DP4 0x2 -#define SHDR_DPH 0x3 -#define SHDR_DST 0x4 -#define SHDR_EXP 0x5 -#define SHDR_LOG 0x6 -#define SHDR_LITP 0x7 -#define SHDR_MUL 0x8 -#define SHDR_SGE 0x9 -#define SHDR_SLT 0xA -#define SHDR_FLR 0xB -#define SHDR_MAX 0xC -#define SHDR_MIN 0xD -#define SHDR_RCP 0xE -#define SHDR_RSQ 0xF - -#define SHDR_MOVA 0x12 -#define SHDR_MOV 0x13 - -#define SHDR_RET 0x21 //This is actually NOP -#define SHDR_FLS 0x22 //This is actually END -#define SHDR_BREAKC 0x23 -#define SHDR_CALL 0x24 -#define SHDR_CALLC 0x25 -#define SHDR_CALLB 0x26 -#define SHDR_IFB 0x27 -#define SHDR_IFC 0x28 -#define SHDR_LOOP 0x29 -#define SHDR_JPC 0x2C -#define SHDR_JPB 0x2D -#define SHDR_CMP 0x2E -#define SHDR_CMP2 0x2F -#define SHDR_MAD1 0x38 -#define SHDR_MAD2 0x39 -#define SHDR_MAD3 0x3A -#define SHDR_MAD4 0x3B -#define SHDR_MAD5 0x3C -#define SHDR_MAD6 0x3D -#define SHDR_MAD7 0x3E -#define SHDR_MAD8 0x3F - static u32 instr_mad_src1(u32 hex) { return (hex >> 0x11) & 0x7F; } + static u32 instr_mad_src2(u32 hex) { return (hex >> 0xA) & 0x7F; } + static u32 instr_mad_src3(u32 hex) { return (hex >> 0x5) & 0x1F; } + static u32 instr_mad_dest(u32 hex) { return hex& 0x1F; @@ -332,30 +273,37 @@ static u32 instr_common_src1(u32 hex) { return (hex >> 0xC) & 0x7F; } + static u32 instr_common_idx(u32 hex) { return (hex >> 0x13) & 0x3; } + static u32 instr_common_dest(u32 hex) { return (hex >> 0x15) & 0x1F; } + static u32 instr_common_src2(u32 hex) { return (hex >> 0x7) & 0x1F; } + static u32 instr_common_operand_desc_id(u32 hex) { return hex & 0x3F; } + static u32 instr_opcode(u32 hex) { return (hex >> 0x1A); } + static u32 instr_flow_control_offset_words(u32 hex) { return (hex>>0xa)&0xFFF; } + static bool swizzle_DestComponentEnabled(int i, u32 swizzle) { return (swizzle & (0x8 >> i)); @@ -394,21 +342,23 @@ void loop(struct VertexShaderState* state, u32 offset, u32 num_instruction, u32 Stack_Push(&state->loop_int_stack, int_reg); Stack_Push(&state->loop_end_stack, return_offset); } + void ifcall(struct VertexShaderState* state, u32 offset, u32 num_instruction, u32 return_offset) { #ifdef printfunc DEBUG("callif %03x %03x %03x\n", offset, num_instruction, return_offset); #endif - state->program_counter = &GPUshadercodebuffer[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + state->program_counter = &GPU_ShaderCodeBuffer[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset Stack_Push(&state->if_stack, offset + num_instruction); Stack_Push(&state->if_end_stack, return_offset); } + void call(struct VertexShaderState* state, u32 offset, u32 num_instruction, u32 return_offset) { #ifdef printfunc DEBUG("callnorm %03x %03x %03x\n", offset, num_instruction, return_offset); #endif - state->program_counter = &GPUshadercodebuffer[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + state->program_counter = &GPU_ShaderCodeBuffer[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset Stack_Push(&state->call_stack, offset + num_instruction); Stack_Push(&state->call_end_stack, return_offset); } @@ -418,8 +368,8 @@ void ProcessShaderCode(struct VertexShaderState* state) float should_not_be_used = 0; while (true) { if (!Stack_Empty(&state->if_stack)) { - if ((state->program_counter - &GPUshadercodebuffer[0]) == Stack_Top(&state->if_stack)) { - state->program_counter = &GPUshadercodebuffer[Stack_Top(&state->if_end_stack)]; + if ((state->program_counter - &GPU_ShaderCodeBuffer[0]) == Stack_Top(&state->if_stack)) { + state->program_counter = &GPU_ShaderCodeBuffer[Stack_Top(&state->if_end_stack)]; Stack_Pop(&state->if_stack); Stack_Pop(&state->if_end_stack); // TODO: Is "trying again" accurate to hardware? @@ -427,8 +377,8 @@ void ProcessShaderCode(struct VertexShaderState* state) } } if (!Stack_Empty(&state->call_stack)) { - if ((state->program_counter - &GPUshadercodebuffer[0]) == Stack_Top(&state->call_stack)) { - state->program_counter = &GPUshadercodebuffer[Stack_Top(&state->call_end_stack)]; + if ((state->program_counter - &GPU_ShaderCodeBuffer[0]) == Stack_Top(&state->call_stack)) { + state->program_counter = &GPU_ShaderCodeBuffer[Stack_Top(&state->call_end_stack)]; Stack_Pop(&state->call_stack); Stack_Pop(&state->call_end_stack); // TODO: Is "trying again" accurate to hardware? @@ -436,11 +386,11 @@ void ProcessShaderCode(struct VertexShaderState* state) } } if (!Stack_Empty(&state->loop_stack)) { - if ((state->program_counter - &GPUshadercodebuffer[0]) == Stack_Top(&state->loop_stack)) { + if ((state->program_counter - &GPU_ShaderCodeBuffer[0]) == Stack_Top(&state->loop_stack)) { u8 ID = Stack_Top(&state->loop_int_stack); if (Stack_Top_DEC(&state->loop_numb_stack) != 0) { - state->program_counter = &GPUshadercodebuffer[Stack_Top(&state->loop_end_stack)]; + state->program_counter = &GPU_ShaderCodeBuffer[Stack_Top(&state->loop_end_stack)]; } else //remove loop { @@ -455,7 +405,6 @@ void ProcessShaderCode(struct VertexShaderState* state) } } - bool increment_pc = true; //speedup todo u32 instr = *(u32*)state->program_counter; @@ -673,9 +622,9 @@ void ProcessShaderCode(struct VertexShaderState* state) break; } - case SHDR_RET: //Really just a NOP + case SHDR_NOP: #ifdef printfunc - DEBUG("RET\n"); + DEBUG("NOP\n"); #endif break; @@ -687,7 +636,7 @@ void ProcessShaderCode(struct VertexShaderState* state) u32 addrv = (instr >> 8) & 0x3FFC; u32 boolv = (instr >> 22) & 0xF; u32 retv = instr & 0x3FF; - call(state, addrv / 4, retv, ((u32)(uintptr_t)(state->program_counter + 1) - (u32)(uintptr_t)(&GPUshadercodebuffer[0])) / 4); + call(state, addrv / 4, retv, ((u32)(uintptr_t)(state->program_counter + 1) - (u32)(uintptr_t)(&GPU_ShaderCodeBuffer[0])) / 4); break; } @@ -728,7 +677,7 @@ void ProcessShaderCode(struct VertexShaderState* state) if (condition) { - call(state, addrv / 4, retv, ((u32)(uintptr_t)(state->program_counter + 1) - (u32)(uintptr_t)(&GPUshadercodebuffer[0])) / 4); + call(state, addrv / 4, retv, ((u32)(uintptr_t)(state->program_counter + 1) - (u32)(uintptr_t)(&GPU_ShaderCodeBuffer[0])) / 4); } break; } @@ -745,14 +694,14 @@ void ProcessShaderCode(struct VertexShaderState* state) if (state->boolean_registers[boolv]) { - call(state, addrv / 4, retv, ((u32)(uintptr_t)(state->program_counter + 1) - (u32)(uintptr_t)(&GPUshadercodebuffer[0])) / 4); + call(state, addrv / 4, retv, ((u32)(uintptr_t)(state->program_counter + 1) - (u32)(uintptr_t)(&GPU_ShaderCodeBuffer[0])) / 4); } break; } - case SHDR_FLS: + case SHDR_END: #ifdef printfunc - DEBUG("FLS\n"); + DEBUG("END\n"); #endif // TODO: Do whatever needs to be done here? return; @@ -782,12 +731,12 @@ void ProcessShaderCode(struct VertexShaderState* state) if(condition) { - u32 binary_offset = ((u32)(uintptr_t)(state->program_counter) - (u32)(uintptr_t)(&GPUshadercodebuffer[0])) / 4; + u32 binary_offset = ((u32)(uintptr_t)(state->program_counter) - (u32)(uintptr_t)(&GPU_ShaderCodeBuffer[0])) / 4; ifcall(state, binary_offset + 1, (addrv / 4) - (binary_offset), (addrv / 4) + retv); } else { - state->program_counter = &GPUshadercodebuffer[addrv / 4]; + state->program_counter = &GPU_ShaderCodeBuffer[addrv / 4]; increment_pc = false; } break; @@ -827,12 +776,12 @@ void ProcessShaderCode(struct VertexShaderState* state) if(condition) { - u32 binary_offset = ((u32)(uintptr_t)(state->program_counter) - (u32)(uintptr_t)(&GPUshadercodebuffer[0])) / 4; + u32 binary_offset = ((u32)(uintptr_t)(state->program_counter) - (u32)(uintptr_t)(&GPU_ShaderCodeBuffer[0])) / 4; ifcall(state, binary_offset + 1, (addrv / 4) - (binary_offset), (addrv / 4) + retv); } else { - state->program_counter = &GPUshadercodebuffer[addrv / 4]; + state->program_counter = &GPU_ShaderCodeBuffer[addrv / 4]; increment_pc = false; } break; @@ -853,7 +802,7 @@ void ProcessShaderCode(struct VertexShaderState* state) DEBUG("JPB %08X, %s\n", addrv, condition?"true":"false"); #endif if (condition) { - state->program_counter = &GPUshadercodebuffer[addrv / 4]; + state->program_counter = &GPU_ShaderCodeBuffer[addrv / 4]; increment_pc = false; } @@ -893,7 +842,7 @@ void ProcessShaderCode(struct VertexShaderState* state) DEBUG("JPC %08X, %s\n", addrv, condition ? "true" : "false"); #endif if (condition) { - state->program_counter = &GPUshadercodebuffer[addrv / 4]; + state->program_counter = &GPU_ShaderCodeBuffer[addrv / 4]; increment_pc = false; } @@ -924,7 +873,7 @@ void ProcessShaderCode(struct VertexShaderState* state) DEBUG("LOOP %02X %03X %01x\n", NUM, DST, ID); //this is not realy a loop it is more like that happens for(aL = ID.y;;aL += ID.z) #endif state->address_registers[2] = state->integer_registers[ID][1]; - loop(state, DST, NUM + 1, ((u32)(uintptr_t)(state->program_counter) - (u32)(uintptr_t)(&GPUshadercodebuffer[0])) / 4 + 1, ID, state->integer_registers[ID][0]); + loop(state, DST, NUM + 1, ((u32)(uintptr_t)(state->program_counter) - (u32)(uintptr_t)(&GPU_ShaderCodeBuffer[0])) / 4 + 1, ID, state->integer_registers[ID][0]); break; } case SHDR_MAD1: //todo add swizzle for the other src @@ -1027,7 +976,7 @@ void RunShader(struct vec4 input[17], int num_attributes, struct OutputVertex *r struct VertexShaderState state; //const u32* main = &shader_memory[registers.Get().offset_words]; - state.program_counter = &GPUshadercodebuffer[GPU_Regs[VSMainOffset] & 0xFFFF]; + state.program_counter = &GPU_ShaderCodeBuffer[GPU_Regs[VS_MainOffset] & 0xFFFF]; // Setup input register table @@ -1036,7 +985,7 @@ void RunShader(struct vec4 input[17], int num_attributes, struct OutputVertex *r state.input_register_table[i] = &dummy_register; for (int i = 0; i> 0) & 0x1F, (output_register_map >> 8) & 0x1F, @@ -1090,30 +1039,19 @@ void RunShader(struct vec4 input[17], int num_attributes, struct OutputVertex *r for (int i = 0; i < 16; i++) { for (int j = 0; j < 4; j++)state.temporary_registers[i].v[j] = (0.f); } - ret->tc0.v[0] = 0.f; - ret->tc0.v[1] = 0.f; + ret->texcoord0.v[0] = 0.f; + ret->texcoord0.v[1] = 0.f; ProcessShaderCode(&state); GPUDEBUG("Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)\n", - ret->pos.v[0], ret->pos.v[1], ret->pos.v[2], ret->pos.v[3], + ret->position.v[0], ret->position.v[1], ret->position.v[2], ret->position.v[3], ret->color.v[0], ret->color.v[1], ret->color.v[2], ret->color.v[3], - ret->tc0.v[0], ret->tc0.v[1]); + ret->texcoord0.v[0], ret->texcoord0.v[1]); //return ret; } - - - - - - - - - - - static u32 GetComponent(u32 n,u32* data) { if (n < 8) { @@ -1141,15 +1079,18 @@ static u32 GetFormat(u32 n, u32* data) return ((*(data + 2)) >> (n - 8) * 4) & 0x3; } } + static u32 GetElementSizeInBytes(int n, u32* data) { return (GetFormat(n,data) == Format_FLOAT) ? 4 : (GetFormat(n,data) == Format_SHORT) ? 2 : 1; } + static u32 GetStride(int n, u32* data) { return GetNumElements(n,data) * GetElementSizeInBytes(n,data); } -void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) + +void gpu_WriteID(u16 ID, u8 mask, u32 size, u32* buffer) { u32 i; switch (ID) { @@ -1184,7 +1125,7 @@ void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) //NumTotalAttributes = component_count; for (int component = 0; component < component_count; component++) { u32 attribute_index = GetComponent(component, loader_config);//loader_config.GetComponent(component); - vertex_attribute_sources[attribute_index] = (u8*)get_pymembuffer(load_address); + vertex_attribute_sources[attribute_index] = (u8*)gpu_GetPhysicalMemoryBuff(load_address); vertex_attribute_strides[attribute_index] = (loader_config[2] >> 16) & 0xFFF; vertex_attribute_formats[attribute_index] = GetFormat(attribute_index, attribute_config); vertex_attribute_elements[attribute_index] = GetNumElements(attribute_index, attribute_config); @@ -1197,7 +1138,7 @@ void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) bool is_indexed = (ID == TriggerDrawIndexed); //const auto& index_info = registers.Get(); u32 index_info_offset = GPU_Regs[IndexArrayConfig] & 0x7FFFFFFF; - const u8* index_address_8 = (u8*)get_pymembuffer(base_address + index_info_offset); + const u8* index_address_8 = (u8*)gpu_GetPhysicalMemoryBuff(base_address + index_info_offset); const u16* index_address_16 = (u16*)index_address_8; bool index_u16 = (GPU_Regs[IndexArrayConfig] >> 31); for (u32 index = 0; index < GPU_Regs[NumVertices]; index++) { @@ -1221,7 +1162,7 @@ void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) GPUDEBUG("Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f\n", comp, i, vertex, index, base_address, - vertex_attribute_sources[i] - (u8*)get_pymembuffer(base_address), + vertex_attribute_sources[i] - (u8*)gpu_GetPhysicalMemoryBuff(base_address), srcdata - vertex_attribute_sources[i], input[i].v[comp]); } @@ -1245,38 +1186,38 @@ void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) } // Load shader program code - case VSLoadProgramData: - case VSLoadProgramData + 1: - case VSLoadProgramData + 2: - case VSLoadProgramData + 3: - case VSLoadProgramData + 4: - case VSLoadProgramData + 5: - case VSLoadProgramData + 6: - case VSLoadProgramData + 7: + case VS_LoadProgramData: + case VS_LoadProgramData + 1: + case VS_LoadProgramData + 2: + case VS_LoadProgramData + 3: + case VS_LoadProgramData + 4: + case VS_LoadProgramData + 5: + case VS_LoadProgramData + 6: + case VS_LoadProgramData + 7: if (mask != 0xF) { - GPUDEBUG("abnormal VSLoadProgramData %0x1 %0x3\n", mask, size); + GPUDEBUG("abnormal VS_LoadProgramData %0x1 %0x3\n", mask, size); } for (i = 0; i < size; i++) - GPUshadercodebuffer[GPU_Regs[VSBeginLoadProgramData]++] = *(buffer + i); + GPU_ShaderCodeBuffer[GPU_Regs[VS_BeginLoadProgramData]++] = *(buffer + i); break; - case VSLoadSwizzleData: + case VS_LoadSwizzleData: - case VSLoadSwizzleData + 1: - case VSLoadSwizzleData + 2: - case VSLoadSwizzleData + 3: - case VSLoadSwizzleData + 4: - case VSLoadSwizzleData + 5: - case VSLoadSwizzleData + 6: - case VSLoadSwizzleData + 7: + case VS_LoadSwizzleData + 1: + case VS_LoadSwizzleData + 2: + case VS_LoadSwizzleData + 3: + case VS_LoadSwizzleData + 4: + case VS_LoadSwizzleData + 5: + case VS_LoadSwizzleData + 6: + case VS_LoadSwizzleData + 7: if (mask != 0xF) { GPUDEBUG("abnormal VSLoadSwizzleData %0x1 %0x3\n", mask, size); } for (i = 0; i < size; i++) - swizzle_data[GPU_Regs[VSBeginLoadSwizzleData]++] = *(buffer + i); + swizzle_data[GPU_Regs[VS_BeginLoadSwizzleData]++] = *(buffer + i); break; - case VSresttriangel: + case VS_resttriangel: if (*buffer & 0x1) //todo more checks { buffer_index = 0; @@ -1284,40 +1225,40 @@ void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) } updateGPUintreg(*buffer, ID, mask); break; - case VSFloatUniformSetup: + case VS_FloatUniformSetup: updateGPUintreg(*buffer, ID, mask); buffer++; size--; - case VSFloatUniformSetup + 1: - case VSFloatUniformSetup + 2: - case VSFloatUniformSetup + 3: - case VSFloatUniformSetup + 4: - case VSFloatUniformSetup + 5: - case VSFloatUniformSetup + 6: - case VSFloatUniformSetup + 7: - case VSFloatUniformSetup + 8: + case VS_FloatUniformSetup + 1: + case VS_FloatUniformSetup + 2: + case VS_FloatUniformSetup + 3: + case VS_FloatUniformSetup + 4: + case VS_FloatUniformSetup + 5: + case VS_FloatUniformSetup + 6: + case VS_FloatUniformSetup + 7: + case VS_FloatUniformSetup + 8: for (i = 0; i < size; i++) { - VSFloatUniformSetuptembuffer[VSFloatUniformSetuptembuffercurrent++] = *(buffer + i); - bool isfloat32 = (GPU_Regs[VSFloatUniformSetup] >> 31) == 1; + VS_FloatUniformSetUpTempBuffer[VS_FloatUniformSetUpTempBufferCurrent++] = *(buffer + i); + bool isfloat32 = (GPU_Regs[VS_FloatUniformSetup] >> 31) == 1; - if (VSFloatUniformSetuptembuffercurrent == (isfloat32 ? 4 : 3)) { - VSFloatUniformSetuptembuffercurrent = 0; - u8 index = GPU_Regs[VSFloatUniformSetup] & 0x7F; + if (VS_FloatUniformSetUpTempBufferCurrent == (isfloat32 ? 4 : 3)) { + VS_FloatUniformSetUpTempBufferCurrent = 0; + u8 index = GPU_Regs[VS_FloatUniformSetup] & 0x7F; if (index > 95) { GPUDEBUG("Invalid VS uniform index %02x\n", index); break; } // NOTE: The destination component order indeed is "backwards" if (isfloat32) { - const_vectors[index].v[3] = *(float*)(&VSFloatUniformSetuptembuffer[0]); - const_vectors[index].v[2] = *(float*)(&VSFloatUniformSetuptembuffer[1]); - const_vectors[index].v[1] = *(float*)(&VSFloatUniformSetuptembuffer[2]); - const_vectors[index].v[0] = *(float*)(&VSFloatUniformSetuptembuffer[3]); + const_vectors[index].v[3] = *(float*)(&VS_FloatUniformSetUpTempBuffer[0]); + const_vectors[index].v[2] = *(float*)(&VS_FloatUniformSetUpTempBuffer[1]); + const_vectors[index].v[1] = *(float*)(&VS_FloatUniformSetUpTempBuffer[2]); + const_vectors[index].v[0] = *(float*)(&VS_FloatUniformSetUpTempBuffer[3]); } else { - f24to32(VSFloatUniformSetuptembuffer[0] >> 8, &const_vectors[index].v[3]); - f24to32(((VSFloatUniformSetuptembuffer[0] & 0xFF) << 16) | ((VSFloatUniformSetuptembuffer[1] >> 16) & 0xFFFF), &const_vectors[index].v[2]); - f24to32(((VSFloatUniformSetuptembuffer[1] & 0xFFFF) << 8) | ((VSFloatUniformSetuptembuffer[2] >> 24) & 0xFF), &const_vectors[index].v[1]); - f24to32(VSFloatUniformSetuptembuffer[2] & 0xFFFFFF, &const_vectors[index].v[0]); + f24to32(VS_FloatUniformSetUpTempBuffer[0] >> 8, &const_vectors[index].v[3]); + f24to32(((VS_FloatUniformSetUpTempBuffer[0] & 0xFF) << 16) | ((VS_FloatUniformSetUpTempBuffer[1] >> 16) & 0xFFFF), &const_vectors[index].v[2]); + f24to32(((VS_FloatUniformSetUpTempBuffer[1] & 0xFFFF) << 8) | ((VS_FloatUniformSetUpTempBuffer[2] >> 24) & 0xFF), &const_vectors[index].v[1]); + f24to32(VS_FloatUniformSetUpTempBuffer[2] & 0xFFFFFF, &const_vectors[index].v[0]); } @@ -1325,7 +1266,7 @@ void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) const_vectors[index].v[0], const_vectors[index].v[1], const_vectors[index].v[2], const_vectors[index].v[3]); // TODO: Verify that this actually modifies the register! - GPU_Regs[VSFloatUniformSetup]++; + GPU_Regs[VS_FloatUniformSetup]++; } } break; @@ -1341,11 +1282,9 @@ void writeGPUID(u16 ID, u8 mask, u32 size, u32* buffer) } } - - -void updateFramebufferaddr(u32 addr,bool bot) +void gpu_UpdateFramebufferAddr(u32 addr, bool bottom) { - u32 active_framebuf = mem_Read32(addr); //"0=first, 1=second" + u32 active_framebuffer = mem_Read32(addr); //"0=first, 1=second" u32 framebuf0_vaddr = mem_Read32(addr + 4); //"Framebuffer virtual address, for the main screen this is the 3D left framebuffer" u32 framebuf1_vaddr = mem_Read32(addr + 8); //"For the main screen: 3D right framebuffer address" u32 framebuf_widthbytesize = mem_Read32(addr + 12); //"Value for 0x1EF00X90, controls framebuffer width" @@ -1353,25 +1292,25 @@ void updateFramebufferaddr(u32 addr,bool bot) u32 framebuf_dispselect = mem_Read32(addr + 20); //"Value for 0x1EF00X78, controls which framebuffer is displayed" u32 unk = mem_Read32(addr + 24); //"?" - if (!bot) { - if(active_framebuf == 0) - gpu_WriteReg32(RGBuponeleft, convertvirtualtopys(framebuf0_vaddr)); + if (!bottom) { + if(active_framebuffer == 0) + gpu_WriteReg32(RGBuponeleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); else - gpu_WriteReg32(RGBuptwoleft, convertvirtualtopys(framebuf0_vaddr)); + gpu_WriteReg32(RGBuptwoleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); if(framebuf1_vaddr == 0) { - if(active_framebuf == 0) - gpu_WriteReg32(RGBuponeright, convertvirtualtopys(framebuf0_vaddr)); + if(active_framebuffer == 0) + gpu_WriteReg32(RGBuponeright, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); else - gpu_WriteReg32(RGBuptworight, convertvirtualtopys(framebuf0_vaddr)); + gpu_WriteReg32(RGBuptworight, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); } else { - if(active_framebuf == 0) - gpu_WriteReg32(RGBuponeright, convertvirtualtopys(framebuf1_vaddr)); + if(active_framebuffer == 0) + gpu_WriteReg32(RGBuponeright, gpu_ConvertVirtualToPhysical(framebuf1_vaddr)); else - gpu_WriteReg32(RGBuptworight, convertvirtualtopys(framebuf1_vaddr)); + gpu_WriteReg32(RGBuptworight, gpu_ConvertVirtualToPhysical(framebuf1_vaddr)); } gpu_WriteReg32(framestridetop, framebuf_widthbytesize); @@ -1379,10 +1318,10 @@ void updateFramebufferaddr(u32 addr,bool bot) gpu_WriteReg32(frameformattop, format); gpu_WriteReg32(frameselecttop, framebuf_dispselect); } else { - if(active_framebuf == 0) - gpu_WriteReg32(RGBdownoneleft, convertvirtualtopys(framebuf0_vaddr)); + if(active_framebuffer == 0) + gpu_WriteReg32(RGBdownoneleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); else - gpu_WriteReg32(RGBdowntwoleft, convertvirtualtopys(framebuf0_vaddr)); + gpu_WriteReg32(RGBdowntwoleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); gpu_WriteReg32(framestridebot, framebuf_widthbytesize); @@ -1392,45 +1331,45 @@ void updateFramebufferaddr(u32 addr,bool bot) return; } -void updateFramebuffer() +void gpu_UpdateFramebuffer() { //we use the last in buffer with flag set int i; for (i = 0; i < 4; i++) { - u8 *baseaddrtop = (u8*)(GSPsharedbuff + 0x200 + i * 0x80); //top - if (*(u8*)(baseaddrtop + 1)) { - *(u8*)(baseaddrtop + 1) = 0; - if (*(u8*)(baseaddrtop)) - baseaddrtop += 0x20; //get the other + u8 *base_addr_top = (u8*)(GSP_SharedBuff + 0x200 + i * 0x80); //top + if (*(u8*)(base_addr_top + 1)) { + *(u8*)(base_addr_top + 1) = 0; + if (*(u8*)(base_addr_top)) + base_addr_top += 0x20; //get the other else - baseaddrtop += 0x4; - - u32 active_framebuf = *(u32*)(baseaddrtop); //"0=first, 1=second" - u32 framebuf0_vaddr = *(u32*)(baseaddrtop + 4); //"Framebuffer virtual address, for the main screen this is the 3D left framebuffer" - u32 framebuf1_vaddr = *(u32*)(baseaddrtop + 8); //"For the main screen: 3D right framebuffer address" - u32 framebuf_widthbytesize = *(u32*)(baseaddrtop + 12); //"Value for 0x1EF00X90, controls framebuffer width" - u32 format = *(u32*)(baseaddrtop + 16); //"Framebuffer format, this u16 is written to the low u16 for LCD register 0x1EF00X70." - u32 framebuf_dispselect = *(u32*)(baseaddrtop + 20); //"Value for 0x1EF00X78, controls which framebuffer is displayed" - u32 unk = *(u32*)(baseaddrtop + 24); //"?" - - if(active_framebuf == 0) - gpu_WriteReg32(RGBuponeleft, convertvirtualtopys(framebuf0_vaddr)); + base_addr_top += 0x4; + + u32 active_framebuffer = *(u32*)(base_addr_top); //"0=first, 1=second" + u32 framebuf0_vaddr = *(u32*)(base_addr_top + 4); //"Framebuffer virtual address, for the main screen this is the 3D left framebuffer" + u32 framebuf1_vaddr = *(u32*)(base_addr_top + 8); //"For the main screen: 3D right framebuffer address" + u32 framebuf_widthbytesize = *(u32*)(base_addr_top + 12); //"Value for 0x1EF00X90, controls framebuffer width" + u32 format = *(u32*)(base_addr_top + 16); //"Framebuffer format, this u16 is written to the low u16 for LCD register 0x1EF00X70." + u32 framebuf_dispselect = *(u32*)(base_addr_top + 20); //"Value for 0x1EF00X78, controls which framebuffer is displayed" + u32 unk = *(u32*)(base_addr_top + 24); //"?" + + if(active_framebuffer == 0) + gpu_WriteReg32(RGBuponeleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); else - gpu_WriteReg32(RGBuptwoleft, convertvirtualtopys(framebuf0_vaddr)); + gpu_WriteReg32(RGBuptwoleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); if(framebuf1_vaddr == 0) { - if(active_framebuf == 0) - gpu_WriteReg32(RGBuponeright, convertvirtualtopys(framebuf0_vaddr)); + if(active_framebuffer == 0) + gpu_WriteReg32(RGBuponeright, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); else - gpu_WriteReg32(RGBuptworight, convertvirtualtopys(framebuf0_vaddr)); + gpu_WriteReg32(RGBuptworight, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); } else { - if(active_framebuf == 0) - gpu_WriteReg32(RGBuponeright, convertvirtualtopys(framebuf1_vaddr)); + if(active_framebuffer == 0) + gpu_WriteReg32(RGBuponeright, gpu_ConvertVirtualToPhysical(framebuf1_vaddr)); else - gpu_WriteReg32(RGBuptworight, convertvirtualtopys(framebuf1_vaddr)); + gpu_WriteReg32(RGBuptworight, gpu_ConvertVirtualToPhysical(framebuf1_vaddr)); } gpu_WriteReg32(framestridetop, framebuf_widthbytesize); @@ -1438,26 +1377,26 @@ void updateFramebuffer() gpu_WriteReg32(frameformattop, format); gpu_WriteReg32(frameselecttop, framebuf_dispselect); } - u8 *baseaddrbot = (u8*)(GSPsharedbuff + 0x240 + i * 0x80); //bot - if (*(u8*)(baseaddrbot + 1)) { - *(u8*)(baseaddrbot + 1) = 0; - if (*(u8*)(baseaddrbot)) - baseaddrbot += 0x20; //get the other + u8 *base_addr_bottom = (u8*)(GSP_SharedBuff + 0x240 + i * 0x80); //bottom + if (*(u8*)(base_addr_bottom + 1)) { + *(u8*)(base_addr_bottom + 1) = 0; + if (*(u8*)(base_addr_bottom)) + base_addr_bottom += 0x20; //get the other else - baseaddrbot += 0x4; - - u32 active_framebuf = *(u32*)(baseaddrbot); //"0=first, 1=second" - u32 framebuf0_vaddr = *(u32*)(baseaddrbot + 4); //"Framebuffer virtual address, for the main screen this is the 3D left framebuffer" - u32 framebuf1_vaddr = *(u32*)(baseaddrbot + 8); //"For the main screen: 3D right framebuffer address" - u32 framebuf_widthbytesize = *(u32*)(baseaddrbot + 12); //"Value for 0x1EF00X90, controls framebuffer width" - u32 format = *(u32*)(baseaddrbot + 16); //"Framebuffer format, this u16 is written to the low u16 for LCD register 0x1EF00X70." - u32 framebuf_dispselect = *(u32*)(baseaddrbot + 20); //"Value for 0x1EF00X78, controls which framebuffer is displayed" - u32 unk = *(u32*)(baseaddrbot + 24); //"?" - - if(active_framebuf == 0) - gpu_WriteReg32(RGBdownoneleft, convertvirtualtopys(framebuf0_vaddr)); + base_addr_bottom += 0x4; + + u32 active_framebuffer = *(u32*)(base_addr_bottom); //"0=first, 1=second" + u32 framebuf0_vaddr = *(u32*)(base_addr_bottom + 4); //"Framebuffer virtual address, for the main screen this is the 3D left framebuffer" + u32 framebuf1_vaddr = *(u32*)(base_addr_bottom + 8); //"For the main screen: 3D right framebuffer address" + u32 framebuf_widthbytesize = *(u32*)(base_addr_bottom + 12); //"Value for 0x1EF00X90, controls framebuffer width" + u32 format = *(u32*)(base_addr_bottom + 16); //"Framebuffer format, this u16 is written to the low u16 for LCD register 0x1EF00X70." + u32 framebuf_dispselect = *(u32*)(base_addr_bottom + 20); //"Value for 0x1EF00X78, controls which framebuffer is displayed" + u32 unk = *(u32*)(base_addr_bottom + 24); //"?" + + if(active_framebuffer == 0) + gpu_WriteReg32(RGBdownoneleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); else - gpu_WriteReg32(RGBdowntwoleft, convertvirtualtopys(framebuf0_vaddr)); + gpu_WriteReg32(RGBdowntwoleft, gpu_ConvertVirtualToPhysical(framebuf0_vaddr)); gpu_WriteReg32(framestridebot, framebuf_widthbytesize); @@ -1469,13 +1408,13 @@ void updateFramebuffer() return; } -u8* get_pymembuffer(u32 addr) +u8* gpu_GetPhysicalMemoryBuff(u32 addr) { - if (addr >= 0x18000000 && addr < 0x18600000)return VRAMbuff + (addr - 0x18000000); - if (addr >= 0x20000000 && addr < 0x28000000)return LINEmembuffer + (addr - 0x20000000); + if (addr >= 0x18000000 && addr < 0x18600000)return VRAM_MemoryBuff + (addr - 0x18000000); + if (addr >= 0x20000000 && addr < 0x28000000)return LINEAR_MemoryBuff + (addr - 0x20000000); return NULL; } -u32 get_py_memrestsize(u32 addr) +u32 gpu_GetPhysicalMemoryRestSize(u32 addr) { if (addr >= 0x18000000 && addr < 0x18600000)return addr - 0x18000000; if (addr >= 0x20000000 && addr < 0x28000000)return addr - 0x20000000; diff --git a/src/gpu/rasterizer.c b/src/gpu/rasterizer.c index e805f2d..8314d28 100644 --- a/src/gpu/rasterizer.c +++ b/src/gpu/rasterizer.c @@ -85,7 +85,7 @@ static u16 GetDepth(int x, int y) u32 outx = ((inputdim >> 12) & 0x3FF); y = (outx - y); - u16* depth_buffer = (u16*)get_pymembuffer(GPU_Regs[DEPTHBUFFER_ADDRESS] << 3); + u16* depth_buffer = (u16*)gpu_GetPhysicalMemoryBuff(GPU_Regs[DEPTHBUFFER_ADDRESS] << 3); return *(depth_buffer + x + y * (GPU_Regs[Framebuffer_FORMAT11E] & 0xFFF) / 2); } @@ -97,7 +97,7 @@ static void SetDepth(int x, int y, u16 value) u32 outx = ((inputdim >> 12) & 0x3FF); y = (outx - y); - u16* depth_buffer = (u16*)get_pymembuffer(GPU_Regs[DEPTHBUFFER_ADDRESS] << 3); + u16* depth_buffer = (u16*)gpu_GetPhysicalMemoryBuff(GPU_Regs[DEPTHBUFFER_ADDRESS] << 3); // Assuming 16-bit depth buffer format until actual format handling is implemented if (depth_buffer) //there is no depth_buffer @@ -111,7 +111,7 @@ static void DrawPixel(int x, int y, struct clov4* color) static void DrawPixel(int x, int y, const struct clov4* color) { #endif - u8* color_buffer = (u8*)get_pymembuffer(GPU_Regs[COLORBUFFER_ADDRESS] << 3); + u8* color_buffer = (u8*)gpu_GetPhysicalMemoryBuff(GPU_Regs[COLORBUFFER_ADDRESS] << 3); #ifdef testtriang color->v[0] = (numb&0xF) << 0x4; @@ -128,7 +128,7 @@ static void DrawPixel(int x, int y, const struct clov4* color) //TODO: workout why this seems required for ctrulib gpu demo (outy=480) if(outy > 240) outy = 240; - //DEBUG("x=%d,y=%d,outx=%d,outy=%d,format=%d,inputdim=%08X,bufferformat=%08X\n", x, y, outx, outy, (GPU_Regs[BUFFERFORMAT] & 0x7000) >> 12, inputdim, GPU_Regs[BUFFERFORMAT]); + //DEBUG("x=%d,y=%d,outx=%d,outy=%d,format=%d,inputdim=%08X,bufferformat=%08X\n", x, y, outx, outy, (GPU_Regs[BUFFER_FORMAT] & 0x7000) >> 12, inputdim, GPU_Regs[BUFFER_FORMAT]); Color ncolor; ncolor.r = color->v[0]; @@ -138,7 +138,7 @@ static void DrawPixel(int x, int y, const struct clov4* color) u8* outaddr; // Assuming RGB8 format until actual framebuffer format handling is implemented - switch (GPU_Regs[BUFFERFORMAT] & 0x7000) { //input format + switch (GPU_Regs[BUFFER_FORMAT] & 0x7000) { //input format case 0: //RGBA8 outaddr = color_buffer + x * 4 + y * (outy)* 4; //check if that is correct @@ -161,7 +161,7 @@ static void DrawPixel(int x, int y, const struct clov4* color) color_encode(&ncolor, RGBA4, outaddr); break; default: - DEBUG("error unknown output format %04X\n", GPU_Regs[BUFFERFORMAT] & 0x7000); + DEBUG("error unknown output format %04X\n", GPU_Regs[BUFFER_FORMAT] & 0x7000); break; } @@ -171,7 +171,7 @@ static void DrawPixel(int x, int y, const struct clov4* color) static void RetrievePixel(int x, int y, struct clov4 *output) { - u8* color_buffer = (u8*)get_pymembuffer(GPU_Regs[COLORBUFFER_ADDRESS] << 3); + u8* color_buffer = (u8*)gpu_GetPhysicalMemoryBuff(GPU_Regs[COLORBUFFER_ADDRESS] << 3); u32 inputdim = GPU_Regs[Framebuffer_FORMAT11E]; u32 outy = (inputdim & 0x7FF); @@ -188,7 +188,7 @@ static void RetrievePixel(int x, int y, struct clov4 *output) u8* addr; // Assuming RGB8 format until actual framebuffer format handling is implemented - switch(GPU_Regs[BUFFERFORMAT] & 0x7000) { //input format + switch(GPU_Regs[BUFFER_FORMAT] & 0x7000) { //input format case 0: //RGBA8 addr = color_buffer + x * 4 + y * (outy)* 4; //check if that is correct @@ -211,7 +211,7 @@ static void RetrievePixel(int x, int y, struct clov4 *output) color_decode(addr, RGBA4, &ncolor); break; default: - DEBUG("error unknown output format %04X\n", GPU_Regs[BUFFERFORMAT] & 0x7000); + DEBUG("error unknown output format %04X\n", GPU_Regs[BUFFER_FORMAT] & 0x7000); break; } @@ -221,12 +221,14 @@ static void RetrievePixel(int x, int y, struct clov4 *output) output->v[3] = ncolor.a; } -static float GetInterpolatedAttribute(float attr0, float attr1, float attr2, const struct OutputVertex *v0, const struct OutputVertex * v1, const struct OutputVertex * v2,float w0,float w1, float w2) +static float GetInterpolatedAttribute(float attr0, float attr1, float attr2, const struct OutputVertex *v0, const struct OutputVertex * v1, + const struct OutputVertex * v2,float w0,float w1, float w2) { - float interpolated_attr_over_w = (attr0 / v0->pos.v[3])*w0 + (attr1 / v1->pos.v[3])*w1 + (attr2 / v2->pos.v[3])*w2; - float interpolated_w_inverse = ((1.f) / v0->pos.v[3])*w0 + ((1.f) / v1->pos.v[3])*w1 + ((1.f) / v2->pos.v[3])*w2; + float interpolated_attr_over_w = (attr0 / v0->position.v[3])*w0 + (attr1 / v1->position.v[3])*w1 + (attr2 / v2->position.v[3])*w2; + float interpolated_w_inverse = ((1.f) / v0->position.v[3])*w0 + ((1.f) / v1->position.v[3])*w1 + ((1.f) / v2->position.v[3])*w2; return interpolated_attr_over_w / interpolated_w_inverse; } + static void GetColorModifier(u32 factor, struct clov4/*3*/ * values) { switch (factor) { @@ -284,6 +286,7 @@ static void GetColorModifier(u32 factor, struct clov4/*3*/ * values) return; } } + static u8 AlphaCombine(u32 op, struct clov3* input) { switch (op) { @@ -382,6 +385,7 @@ typedef enum{ ConstantAlpha = 12, OneMinusConstantAlpha = 13, } BlendFactor; + static void LookupFactorRGB(BlendFactor factor, struct clov4 *source, struct clov4 *output) { switch(factor) @@ -452,6 +456,7 @@ typedef enum{ Repeat = 2, MirrorRepeat = 3 } WrapMode; + static int GetWrappedTexCoord(WrapMode wrap, int val, int size) { if(size == 0) return val; @@ -966,25 +971,25 @@ void rasterizer_ProcessTriangle(struct OutputVertex * v0, struct clov4 texture_color[4]; float u[3],v[3]; - u[0] = GetInterpolatedAttribute(v0->tc0.v[0], v1->tc0.v[0], v2->tc0.v[0], v0, v1, v2, (float)w0, (float)w1, (float)w2); - v[0] = GetInterpolatedAttribute(v0->tc0.v[1], v1->tc0.v[1], v2->tc0.v[1], v0, v1, v2, (float)w0, (float)w1, (float)w2); - u[1] = GetInterpolatedAttribute(v0->tc1.v[0], v1->tc1.v[0], v2->tc1.v[0], v0, v1, v2, (float)w0, (float)w1, (float)w2); - v[1] = GetInterpolatedAttribute(v0->tc1.v[1], v1->tc1.v[1], v2->tc1.v[1], v0, v1, v2, (float)w0, (float)w1, (float)w2); - u[2] = GetInterpolatedAttribute(v0->tc2.v[0], v1->tc2.v[0], v2->tc2.v[0], v0, v1, v2, (float)w0, (float)w1, (float)w2); - v[2] = GetInterpolatedAttribute(v0->tc2.v[1], v1->tc2.v[1], v2->tc2.v[1], v0, v1, v2, (float)w0, (float)w1, (float)w2); + u[0] = GetInterpolatedAttribute(v0->texcoord0.v[0], v1->texcoord0.v[0], v2->texcoord0.v[0], v0, v1, v2, (float)w0, (float)w1, (float)w2); + v[0] = GetInterpolatedAttribute(v0->texcoord0.v[1], v1->texcoord0.v[1], v2->texcoord0.v[1], v0, v1, v2, (float)w0, (float)w1, (float)w2); + u[1] = GetInterpolatedAttribute(v0->texcoord1.v[0], v1->texcoord1.v[0], v2->texcoord1.v[0], v0, v1, v2, (float)w0, (float)w1, (float)w2); + v[1] = GetInterpolatedAttribute(v0->texcoord1.v[1], v1->texcoord1.v[1], v2->texcoord1.v[1], v0, v1, v2, (float)w0, (float)w1, (float)w2); + u[2] = GetInterpolatedAttribute(v0->texcoord2.v[0], v1->texcoord2.v[0], v2->texcoord2.v[0], v0, v1, v2, (float)w0, (float)w1, (float)w2); + v[2] = GetInterpolatedAttribute(v0->texcoord2.v[1], v1->texcoord2.v[1], v2->texcoord2.v[1], v0, v1, v2, (float)w0, (float)w1, (float)w2); for (int i = 0; i < 3; ++i) { - if (GPU_Regs[TEXTURINGSETINGS80] & (0x1<> 16) & 0xFFFF; - wrap_s = (GPU_Regs[TEXTURCONFIG0WRAP] >> 8) & 3; - wrap_t = (GPU_Regs[TEXTURCONFIG0WRAP] >> 12) & 3; - format = GPU_Regs[TEXTURCONFIG0TYPE] & 0xF; + height = (GPU_Regs[TEXTURE_CONFIG0_SIZE] & 0xFFFF); + width = (GPU_Regs[TEXTURE_CONFIG0_SIZE] >> 16) & 0xFFFF; + wrap_s = (GPU_Regs[TEXTURE_CONFIG0_WRAP] >> 8) & 3; + wrap_t = (GPU_Regs[TEXTURE_CONFIG0_WRAP] >> 12) & 3; + format = GPU_Regs[TEXTURE_CONFIG0_TYPE] & 0xF; break; case 1: - height = (GPU_Regs[TEXTURCONFIG1SIZE] & 0xFFFF); - width = (GPU_Regs[TEXTURCONFIG1SIZE] >> 16) & 0xFFFF; - wrap_s = (GPU_Regs[TEXTURCONFIG1WRAP] >> 8) & 3; - wrap_t = (GPU_Regs[TEXTURCONFIG1WRAP] >> 12) & 3; - format = GPU_Regs[TEXTURCONFIG1TYPE] & 0xF; + height = (GPU_Regs[TEXTURE_CONFIG1_SIZE] & 0xFFFF); + width = (GPU_Regs[TEXTURE_CONFIG1_SIZE] >> 16) & 0xFFFF; + wrap_s = (GPU_Regs[TEXTURE_CONFIG1_WRAP] >> 8) & 3; + wrap_t = (GPU_Regs[TEXTURE_CONFIG1_WRAP] >> 12) & 3; + format = GPU_Regs[TEXTURE_CONFIG1_TYPE] & 0xF; break; case 2: - height = (GPU_Regs[TEXTURCONFIG2SIZE] & 0xFFFF); - width = (GPU_Regs[TEXTURCONFIG2SIZE] >> 16) & 0xFFFF; - wrap_s = (GPU_Regs[TEXTURCONFIG2WRAP] >> 8) & 3; - wrap_t = (GPU_Regs[TEXTURCONFIG2WRAP] >> 12) & 3; - format = GPU_Regs[TEXTURCONFIG2TYPE] & 0xF; + height = (GPU_Regs[TEXTURE_CONFIG2_SIZE] & 0xFFFF); + width = (GPU_Regs[TEXTURE_CONFIG2_SIZE] >> 16) & 0xFFFF; + wrap_s = (GPU_Regs[TEXTURE_CONFIG2_WRAP] >> 8) & 3; + wrap_t = (GPU_Regs[TEXTURE_CONFIG2_WRAP] >> 12) & 3; + format = GPU_Regs[TEXTURE_CONFIG2_TYPE] & 0xF; break; } @@ -1218,7 +1223,7 @@ void rasterizer_ProcessTriangle(struct OutputVertex * v0, } //Alpha blending - if((GPU_Regs[COLOROUTPUT_CONFIG] >> 8) & 1) //Alpha blending enabled? + if((GPU_Regs[COLOR_OUTPUT_CONFIG] >> 8) & 1) //Alpha blending enabled? { struct clov4 dest, srcfactor, dstfactor, result; GetPixel(x >> 4, y >> 4, &dest); @@ -1278,7 +1283,7 @@ void rasterizer_ProcessTriangle(struct OutputVertex * v0, } else { - DEBUG("logic op: %x", GPU_Regs[COLORLOGICOP_CONFIG] & 0xF); + DEBUG("logic op: %x", GPU_Regs[COLOR_LOGICOP_CONFIG] & 0xF); } /*struct clov4 combiner_output; diff --git a/src/mem.c b/src/mem.c index 6ecc825..4afb6fc 100644 --- a/src/mem.c +++ b/src/mem.c @@ -58,8 +58,6 @@ static size_t num_mappings; #define EXIT_ON_ILLEGAL 1 //#define PRINT_MISALIGNED 1 - - void mem_Dbugdump() { size_t i; @@ -71,12 +69,12 @@ void mem_Dbugdump() fwrite(mappings[i].phys, 1, schei, data); fclose(data); } - FILE* data = fopen("VRAMdump.bin", "wb"); - fwrite(VRAMbuff, 1, 0x600000, data); + FILE* data = fopen("VRAM_Memorydump.bin", "wb"); + fwrite(VRAM_MemoryBuff, 1, 0x600000, data); fclose(data); - FILE* membuf = fopen("LINEmembuffer.bin", "wb"); - fwrite(LINEmembuffer, 1, 0x8000000, membuf); + FILE* membuf = fopen("LINEAR_Memorydump.bin", "wb"); + fwrite(LINEAR_MemoryBuff, 1, 0x8000000, membuf); fclose(membuf); } diff --git a/src/screen.c b/src/screen.c index 0e14ba7..a003f94 100644 --- a/src/screen.c +++ b/src/screen.c @@ -67,7 +67,7 @@ void screen_RenderGPUaddr(u32 addr) int updateSurface = 0; //Top Screen - u8* buffer = get_pymembuffer(addr); + u8* buffer = gpu_GetPhysicalMemoryBuff(addr); if (buffer != NULL) { SDL_LockSurface(bitmapSurface); @@ -199,7 +199,7 @@ void screen_RenderGPU() } else { u32 addr = ((gpu_ReadReg32(frameselecttop) & 0x1) == 0) ? gpu_ReadReg32(RGBuponeleft) : gpu_ReadReg32(RGBuptwoleft); - u8* buffer = get_pymembuffer(addr); + u8* buffer = gpu_GetPhysicalMemoryBuff(addr); if (buffer != NULL) { SDL_LockSurface(bitmapSurface); @@ -228,7 +228,7 @@ void screen_RenderGPU() SDL_FillRect(bitmapSurface, &rect, SDL_MapRGB(bitmapSurface->format, r, g, b)); } else { u32 addr = ((gpu_ReadReg32(frameselectbot) & 0x1) == 0) ? gpu_ReadReg32(RGBdownoneleft) : gpu_ReadReg32(RGBdowntwoleft); - buffer = get_pymembuffer(addr); + buffer = gpu_GetPhysicalMemoryBuff(addr); if (buffer != NULL) { if (!updateSurface) { SDL_LockSurface(bitmapSurface); diff --git a/src/services/apt_s.c b/src/services/apt_s.c index 2132a38..fdc03dc 100644 --- a/src/services/apt_s.c +++ b/src/services/apt_s.c @@ -124,7 +124,7 @@ SERVICE_CMD(0x440000) RESP(1, -1); return 0; } - memcpy(LINEmembuffer + (0x18000000 - 0x14000000), APTs_sharedfont, APTs_sharedfontsize); + memcpy(LINEAR_MemoryBuff + (0x18000000 - 0x14000000), APTs_sharedfont, APTs_sharedfontsize); /*APTs_sharedfont[3] = 0x2; APTs_sharedfont[2] = 0x0; diff --git a/src/services/apt_u.c b/src/services/apt_u.c index ee34a0e..fb58da0 100644 --- a/src/services/apt_u.c +++ b/src/services/apt_u.c @@ -189,7 +189,7 @@ SERVICE_CMD(0x440000) return 0; } - memcpy(LINEmembuffer + (0x18000000 - 0x14000000), APTsharedfont, APTsharedfontsize); + memcpy(LINEAR_MemoryBuff + (0x18000000 - 0x14000000), APTsharedfont, APTsharedfontsize); /*APTsharedfont[3] = 0x0; APTsharedfont[2] = 0x0; diff --git a/src/services/gsp_gpu.c b/src/services/gsp_gpu.c index d14f585..e9a34cf 100644 --- a/src/services/gsp_gpu.c +++ b/src/services/gsp_gpu.c @@ -39,22 +39,22 @@ void gsp_ExecuteCommandFromSharedMem() // For all threads for (i = 0; i < 0x4; i++) { - u8* baseaddr = (u8*)(GSPsharedbuff + 0x800 + i * 0x200); - u32 header = *(u32*)baseaddr; + u8* base_addr = (u8*)(GSP_SharedBuff + 0x800 + i * 0x200); + u32 header = *(u32*)base_addr; u32 toprocess = (header >> 8) & 0xFF; //mem_Dbugdump(); - *(u32*)baseaddr = 0; + *(u32*)base_addr = 0; for (u32 j = 0; j < toprocess; j++) { - u32 cmd_id = *(u32*)(baseaddr + (j + 1) * 0x20); + u32 cmd_id = *(u32*)(base_addr + (j + 1) * 0x20); switch (cmd_id & 0xFF) { case GSP_ID_REQUEST_DMA: { /* GX::RequestDma */ - u32 src = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x4); - u32 dest = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x8); - u32 size = *(u32*)(baseaddr + (j + 1) * 0x20 + 0xC); + u32 src = *(u32*)(base_addr + (j + 1) * 0x20 + 0x4); + u32 dest = *(u32*)(base_addr + (j + 1) * 0x20 + 0x8); + u32 size = *(u32*)(base_addr + (j + 1) * 0x20 + 0xC); GPUDEBUG("GX RequestDma 0x%08x 0x%08x 0x%08x\n", src, dest, size); @@ -65,12 +65,12 @@ void gsp_ExecuteCommandFromSharedMem() if((src - 0x1f000000 > 0x600000 || src + size - 0x1f000000 > 0x600000)) { - mem_Read(&VRAMbuff[dest - 0x1F000000], src, size); + mem_Read(&VRAM_MemoryBuff[dest - 0x1F000000], src, size); } else { //Can safely assume this is a copy from VRAM to VRAM - memcpy(&VRAMbuff[dest - 0x1F000000], &VRAMbuff[src - 0x1F000000], size); + memcpy(&VRAM_MemoryBuff[dest - 0x1F000000], &VRAM_MemoryBuff[src - 0x1F000000], size); } gpu_SendInterruptToAll(6); @@ -78,9 +78,9 @@ void gsp_ExecuteCommandFromSharedMem() } case GSP_ID_SET_CMDLIST: { /* GX::SetCmdList Last */ - u32 addr = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x4); - u32 size = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x8); - u32 flags = *(u32*)(baseaddr + (j + 1) * 0x20 + 0xC); + u32 addr = *(u32*)(base_addr + (j + 1) * 0x20 + 0x4); + u32 size = *(u32*)(base_addr + (j + 1) * 0x20 + 0x8); + u32 flags = *(u32*)(base_addr + (j + 1) * 0x20 + 0xC); GPUDEBUG("GX SetCommandList Last 0x%08x 0x%08x 0x%08x\n", addr, size, flags); @@ -106,25 +106,25 @@ void gsp_ExecuteCommandFromSharedMem() case GSP_ID_SET_MEMFILL: { //speedup todo u32 addr1, val1, addrend1, addr2, val2, addrend2, width; - addr1 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x4); - val1 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x8); - addrend1 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0xC); - addr2 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x10); - val2 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x14); - addrend2 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x18); - width = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x1C); + addr1 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x4); + val1 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x8); + addrend1 = *(u32*)(base_addr + (j + 1) * 0x20 + 0xC); + addr2 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x10); + val2 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x14); + addrend2 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x18); + width = *(u32*)(base_addr + (j + 1) * 0x20 + 0x1C); GPUDEBUG("GX SetMemoryFill 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\r\n", addr1, val1, addrend1, addr2, val2, addrend2, width); if (addr1 - 0x1f000000 > 0x600000 || addrend1 - 0x1f000000 > 0x600000) { GPUDEBUG("SetMemoryFill into non VRAM not suported\r\n"); } else { - u32 size = getsizeofwight(width & 0xFFFF); + u32 size = gpu_GetSizeOfWidth(width & 0xFFFF); u32 k; for(k = addr1; k < addrend1; k+=size) { s32 m; for(m = size - 1; m >= 0; m--) { - VRAMbuff[m + (k - 0x1F000000)] = (u8)(val1 >> (m * 8)); + VRAM_MemoryBuff[m + (k - 0x1F000000)] = (u8)(val1 >> (m * 8)); } } } @@ -132,12 +132,12 @@ void gsp_ExecuteCommandFromSharedMem() if (addr2 && addrend2) GPUDEBUG("SetMemoryFill into non VRAM not suported\r\n"); } else { - u32 size = getsizeofwight((width >> 16) & 0xFFFF); + u32 size = gpu_GetSizeOfWidth((width >> 16) & 0xFFFF); u32 k; for(k = addr2; k < addrend2; k += size) { s32 m; for (m = size - 1; m >= 0; m--) - VRAMbuff[m + (k - 0x1F000000)] = (u8)(val2 >> (m * 8)); + VRAM_MemoryBuff[m + (k - 0x1F000000)] = (u8)(val2 >> (m * 8)); } } gpu_SendInterruptToAll(0); @@ -149,16 +149,16 @@ void gsp_ExecuteCommandFromSharedMem() u32 inpaddr, outputaddr, inputdim, outputdim, flags, unk; - inpaddr = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x4); - outputaddr = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x8); - inputdim = *(u32*)(baseaddr + (j + 1) * 0x20 + 0xC); - outputdim = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x10); - flags = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x14); - unk = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x18); + inpaddr = *(u32*)(base_addr + (j + 1) * 0x20 + 0x4); + outputaddr = *(u32*)(base_addr + (j + 1) * 0x20 + 0x8); + inputdim = *(u32*)(base_addr + (j + 1) * 0x20 + 0xC); + outputdim = *(u32*)(base_addr + (j + 1) * 0x20 + 0x10); + flags = *(u32*)(base_addr + (j + 1) * 0x20 + 0x14); + unk = *(u32*)(base_addr + (j + 1) * 0x20 + 0x18); GPUDEBUG("GX SetDisplayTransfer 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\r\n", inpaddr, outputaddr, inputdim, outputdim, flags, unk); - u8 * inaddr = get_pymembuffer(convertvirtualtopys(inpaddr)); - u8 * outaddr = get_pymembuffer(convertvirtualtopys(outputaddr)); + u8 * inaddr = gpu_GetPhysicalMemoryBuff(gpu_ConvertVirtualToPhysical(inpaddr)); + u8 * outaddr = gpu_GetPhysicalMemoryBuff(gpu_ConvertVirtualToPhysical(outputaddr)); u32 rely = (inputdim & 0xFFFF); u32 relx = ((inputdim >> 0x10) & 0xFFFF); @@ -292,44 +292,44 @@ void gsp_ExecuteCommandFromSharedMem() } } } - updateFramebuffer(); + gpu_UpdateFramebuffer(); break; } case GSP_ID_SET_TEXTURE_COPY: { gpu_SendInterruptToAll(1); u32 inpaddr, outputaddr /*,size*/, inputdim, outputdim, flags; - inpaddr = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x4); - outputaddr = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x8); - u32 size = *(u32*)(baseaddr + (j + 1) * 0x20 + 0xC); - inputdim = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x10); - outputdim = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x14); - flags = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x18); + inpaddr = *(u32*)(base_addr + (j + 1) * 0x20 + 0x4); + outputaddr = *(u32*)(base_addr + (j + 1) * 0x20 + 0x8); + u32 size = *(u32*)(base_addr + (j + 1) * 0x20 + 0xC); + inputdim = *(u32*)(base_addr + (j + 1) * 0x20 + 0x10); + outputdim = *(u32*)(base_addr + (j + 1) * 0x20 + 0x14); + flags = *(u32*)(base_addr + (j + 1) * 0x20 + 0x18); GPUDEBUG("GX SetTextureCopy 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X --todo--\r\n", inpaddr, outputaddr, size, inputdim, outputdim, flags); - updateFramebuffer(); + gpu_UpdateFramebuffer(); //goto theother; //untill I know what is the differnece break; } case GSP_ID_FLUSH_CMDLIST: { u32 addr1, size1, addr2, size2, addr3, size3; - addr1 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x4); - size1 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x8); - addr2 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0xC); - size2 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x10); - addr3 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x14); - size3 = *(u32*)(baseaddr + (j + 1) * 0x20 + 0x18); + addr1 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x4); + size1 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x8); + addr2 = *(u32*)(base_addr + (j + 1) * 0x20 + 0xC); + size2 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x10); + addr3 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x14); + size3 = *(u32*)(base_addr + (j + 1) * 0x20 + 0x18); GPUDEBUG("GX SetCommandList First 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\r\n", addr1, size1, addr2, size2, addr3, size3); break; } default: - GPUDEBUG("GX cmd 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\r\n", *(u32*)(baseaddr + (j + 1) * 0x20), *(u32*)((baseaddr + (j + 1) * 0x20) + 0x4), *(u32*)((baseaddr + (j + 1) * 0x20) + 0x8), *(u32*)((baseaddr + (j + 1) * 0x20) + 0xC), *(u32*)((baseaddr + (j + 1) * 0x20) + 0x10), *(u32*)((baseaddr + (j + 1) * 0x20) + 0x14), *(u32*)((baseaddr + (j + 1) * 0x20) + 0x18), *(u32*)((baseaddr + (j + 1) * 0x20)) + 0x1C); + GPUDEBUG("GX cmd 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\r\n", *(u32*)(base_addr + (j + 1) * 0x20), *(u32*)((base_addr + (j + 1) * 0x20) + 0x4), *(u32*)((base_addr + (j + 1) * 0x20) + 0x8), *(u32*)((base_addr + (j + 1) * 0x20) + 0xC), *(u32*)((base_addr + (j + 1) * 0x20) + 0x10), *(u32*)((base_addr + (j + 1) * 0x20) + 0x14), *(u32*)((base_addr + (j + 1) * 0x20) + 0x18), *(u32*)((base_addr + (j + 1) * 0x20)) + 0x1C); break; } } } } -u32 GPURegisterInterruptRelayQueue(u32 flags, u32 Kevent, u32*threadID, u32*outMemHandle) +u32 gpu_RegisterInterruptRelayQueue(u32 flags, u32 Kevent, u32*threadID, u32*outMemHandle) { *threadID = numReqQueue++; *outMemHandle = handle_New(HANDLE_TYPE_SHAREDMEM, MEM_TYPE_GSP_0); @@ -342,9 +342,9 @@ u32 GPURegisterInterruptRelayQueue(u32 flags, u32 Kevent, u32*threadID, u32*outM } h->locked = false; //unlock we are fast - *(u32*)(GSPsharedbuff + *threadID * 0x40) = 0x0; //dump from save GSP v0 flags 0 - *(u32*)(GSPsharedbuff + *threadID * 0x44) = 0x0; //dump from save GSP v0 flags 0 - *(u32*)(GSPsharedbuff + *threadID * 0x48) = 0x0; //dump from save GSP v0 flags 0 + *(u32*)(GSP_SharedBuff + *threadID * 0x40) = 0x0; //dump from save GSP v0 flags 0 + *(u32*)(GSP_SharedBuff + *threadID * 0x44) = 0x0; //dump from save GSP v0 flags 0 + *(u32*)(GSP_SharedBuff + *threadID * 0x48) = 0x0; //dump from save GSP v0 flags 0 return 0x2A07; //dump from save GSP v0 flags 0 } @@ -528,7 +528,7 @@ SERVICE_CMD(0x50200) // SetBufferSwap } // TODO: Get rid of this: - updateFramebufferaddr(arm11_ServiceBufferAddress() + 0x88, //don't use CMD(2) here it is not working! + gpu_UpdateFramebufferAddr(arm11_ServiceBufferAddress() + 0x88, //don't use CMD(2) here it is not working! screen & 0x1); screen_RenderGPU(); //display new stuff @@ -589,7 +589,7 @@ SERVICE_CMD(0x130042) // RegisterInterruptRelayQueue u32 outMemHandle = 0; mem_Write32(arm11_ServiceBufferAddress() + 0x84, - GPURegisterInterruptRelayQueue(mem_Read32(arm11_ServiceBufferAddress() + 0x84), + gpu_RegisterInterruptRelayQueue(mem_Read32(arm11_ServiceBufferAddress() + 0x84), mem_Read32(arm11_ServiceBufferAddress() + 0x8C), &threadID, &outMemHandle)); mem_Write32(arm11_ServiceBufferAddress() + 0x88, threadID); @@ -636,16 +636,16 @@ void gpu_SendInterruptToAll(u32 ID) } h->locked = false; //unlock we are fast for (i = 0; i < 4; i++) { - u8 next = *(u8*)(GSPsharedbuff + i * 0x40); //0x33 next is 00 - u8 inuse = *(u8*)(GSPsharedbuff + i * 0x40 + 1); + u8 next = *(u8*)(GSP_SharedBuff + i * 0x40); //0x33 next is 00 + u8 inuse = *(u8*)(GSP_SharedBuff + i * 0x40 + 1); next += inuse; if (inuse > 0x20 && ((ID == 2) || (ID == 3))) continue; //todo - *(u8*)(GSPsharedbuff + i * 0x40 + 1) = inuse + 1; - *(u8*)(GSPsharedbuff + i * 0x40 + 2) = 0x0; //no error + *(u8*)(GSP_SharedBuff + i * 0x40 + 1) = inuse + 1; + *(u8*)(GSP_SharedBuff + i * 0x40 + 2) = 0x0; //no error next = next % 0x34; - *(u8*)(GSPsharedbuff + i * 0x40 + 0xC + next) = ID; + *(u8*)(GSP_SharedBuff + i * 0x40 + 0xC + next) = ID; } if (ID == 4) diff --git a/src/syscalls/memory.c b/src/syscalls/memory.c index 26354bf..8f0e7a2 100644 --- a/src/syscalls/memory.c +++ b/src/syscalls/memory.c @@ -212,7 +212,7 @@ u32 svcControlMemory_wrap(u32 op, u32 addr0, u32 addr1, u32 size, u32 perm, u32* } else { if ((op & 0x10000) == 0x10000) { //LINEAR addr0 = 0x14000000 + linearalloced; - u8* outLINEmembuffer = LINEmembuffer + linearalloced; + u8* outLINEmembuffer = LINEAR_MemoryBuff + linearalloced; linearalloced += size; *newaddr = addr0; // outaddr is in R1 return mem_AddMappingShared(addr0, size, outLINEmembuffer); @@ -311,7 +311,7 @@ u32 svcMapMemoryBlock() if(h->type == HANDLE_TYPE_SHAREDMEM) { switch (h->subtype) { case MEM_TYPE_GSP_0: - mem_AddMappingShared(addr, GSPsharebuffsize, GSPsharedbuff); + mem_AddMappingShared(addr, GSP_Shared_Buff_Size, GSP_SharedBuff); break; case MEM_TYPE_HID_0: mem_AddMappingShared(addr, 0x2000, HIDsharedbuff); @@ -329,7 +329,7 @@ u32 svcMapMemoryBlock() ERROR("No shared font supplied\n"); return -1; } - mem_AddMappingShared(0x18000000, APTsharedfontsize, LINEmembuffer + (0x18000000 - 0x14000000)); + mem_AddMappingShared(0x18000000, APTsharedfontsize, LINEAR_MemoryBuff + (0x18000000 - 0x14000000)); //mem_AddMappingShared(0x18000000, APTsharedfontsize, APTsharedfont); //todo ichfly break; @@ -340,7 +340,7 @@ u32 svcMapMemoryBlock() ERROR("No shared font supplied\n"); return -1; } - mem_AddMappingShared(0x18000000, APTs_sharedfontsize, LINEmembuffer + (0x18000000 - 0x14000000)); + mem_AddMappingShared(0x18000000, APTs_sharedfontsize, LINEAR_MemoryBuff + (0x18000000 - 0x14000000)); //mem_AddMappingShared(0x18000000, APTs_sharedfontsize, APTs_sharedfont); //todo ichfly break; @@ -348,9 +348,9 @@ u32 svcMapMemoryBlock() if (h->misc_ptr[0] == NULL) { if (addr == 0) //map to the lin addr - mem_AddMappingShared(h->misc[0], h->misc[1], LINEmembuffer + (h->misc[0] - 0x14000000)); //shared mem gets only mapped when not already mapped + mem_AddMappingShared(h->misc[0], h->misc[1], LINEAR_MemoryBuff + (h->misc[0] - 0x14000000)); //shared mem gets only mapped when not already mapped else //map to the given addr - mem_AddMappingShared(addr, h->misc[1], LINEmembuffer + (h->misc[0] - 0x14000000)); + mem_AddMappingShared(addr, h->misc[1], LINEAR_MemoryBuff + (h->misc[0] - 0x14000000)); } else {