Skip to content

Commit

Permalink
Improved the compilation of the LV.Q and SV.Q VFPU instructions: sequ…
Browse files Browse the repository at this point in the history
…ences of such instructions are detected and merged when possible into one single operation (using System.arraycopy for moving data between memory and VPR registers). Small performance improvement in compilerPerf.pbp.
  • Loading branch information
gid15 committed Feb 7, 2013
1 parent d87b5f8 commit b40c65d
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 39 deletions.
134 changes: 95 additions & 39 deletions src/jpcsp/Allegrex/Instructions.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@
You should have received a copy of the GNU General Public License
along with Jpcsp. If not, see <http://www.gnu.org/licenses/>.
*/

package jpcsp.Allegrex;


import static jpcsp.Allegrex.Common.Instruction.FLAGS_BRANCH_INSTRUCTION;
import static jpcsp.Allegrex.Common.Instruction.FLAGS_LINK_INSTRUCTION;
import static jpcsp.Allegrex.Common.Instruction.FLAG_CANNOT_BE_SPLIT;
Expand All @@ -33,6 +31,8 @@
import jpcsp.Allegrex.Common.Instruction;
import jpcsp.Allegrex.VfpuState.Vcr.PfxDst;
import jpcsp.Allegrex.VfpuState.Vcr.PfxSrc;
import jpcsp.Allegrex.compiler.CodeInstruction;
import jpcsp.Allegrex.compiler.Compiler;
import jpcsp.Allegrex.compiler.ICompilerContext;
import jpcsp.Allegrex.compiler.RuntimeContext;
import jpcsp.HLE.SyscallHandler;
Expand Down Expand Up @@ -4352,11 +4352,49 @@ public void compile(ICompilerContext context, int insn) {
int rs = context.getRsRegisterIndex();
final int vsize = 4;

for (int n = 0; n < vsize; n++) {
context.prepareVtForStoreInt(vsize, vt, n);
context.memRead32(rs, simm14 + n * 4);
context.storeVtInt(vsize, vt, n);
int countSequence = 1;
int address = context.getCodeInstruction().getAddress();

// Compare LV.Q opcode and vt1 flag
final int opcodeMask = 0xFFE00003;
for (int i = 1; true; i++) {
CodeInstruction nextCodeInstruction = context.getCodeInstruction(address + i * 4);
boolean isSequence = false;
if (nextCodeInstruction != null) {
int nextInsn = nextCodeInstruction.getOpcode();
if (nextCodeInstruction != null && (nextInsn & opcodeMask) == (insn & opcodeMask)) {
int nextSimm14 = nextCodeInstruction.getImm14(true);
if (nextSimm14 == simm14 + i * 16) {
int nextVt5 = (nextInsn >> 16) & 31;
if (nextVt5 == vt5 + i) {
isSequence = true;
}
}
}
}

if (!isSequence) {
break;
}
countSequence++;
}

if (context.compileVFPULoad(context.getRsRegisterIndex(), simm14, vt, countSequence * 4)) {
if (countSequence > 1) {
if (Compiler.log.isDebugEnabled()) {
Compiler.log.debug(String.format("lv.q sequence 0x%08X-0x%08X", address, address + countSequence * 4 - 4));
}

// Skip the next lv.q instructions
context.skipInstructions(countSequence - 1, false);
}
} else {
for (int n = 0; n < vsize; n++) {
context.prepareVtForStoreInt(vsize, vt, n);
context.memRead32(rs, simm14 + n * 4);
context.storeVtInt(vsize, vt, n);
}
}
}
@Override
public String disasm(int address, int insn) {
Expand Down Expand Up @@ -4577,11 +4615,49 @@ public void compile(ICompilerContext context, int insn) {
int rs = context.getRsRegisterIndex();
int vsize = 4;

for (int n = 0; n < vsize; n++) {
context.prepareMemWrite32(rs, simm14 + n * 4);
context.loadVtInt(vsize, vt, n);
context.memWrite32(rs, simm14 + n * 4);
int countSequence = 1;
int address = context.getCodeInstruction().getAddress();

// Compare SV.Q opcode and vt1 flag
final int opcodeMask = 0xFFE00001;
for (int i = 1; i < 4; i++) {
CodeInstruction nextCodeInstruction = context.getCodeInstruction(address + i * 4);
boolean isSequence = false;
if (nextCodeInstruction != null) {
int nextInsn = nextCodeInstruction.getOpcode();
if (nextCodeInstruction != null && (nextInsn & opcodeMask) == (insn & opcodeMask)) {
int nextSimm14 = nextCodeInstruction.getImm14(true);
if (nextSimm14 == simm14 + i * 16) {
int nextVt5 = (nextInsn >> 16) & 31;
if (nextVt5 == vt5 + i) {
isSequence = true;
}
}
}
}

if (!isSequence) {
break;
}
countSequence++;
}

if (context.compileVFPUStore(context.getRsRegisterIndex(), simm14, vt, countSequence * 4)) {
if (countSequence > 1) {
if (Compiler.log.isDebugEnabled()) {
Compiler.log.debug(String.format("sv.q sequence 0x%08X-0x%08X", address, address + countSequence * 4 - 4));
}

// Skip the next sv.q instructions
context.skipInstructions(countSequence - 1, false);
}
} else {
for (int n = 0; n < vsize; n++) {
context.prepareMemWrite32(rs, simm14 + n * 4);
context.loadVtInt(vsize, vt, n);
context.memWrite32(rs, simm14 + n * 4);
}
}
}
@Override
public String disasm(int address, int insn) {
Expand All @@ -4603,30 +4679,12 @@ public String disasm(int address, int insn) {

@Override
public void interpret(Processor processor, int insn) {
int vt1 = (insn>>0)&1;
int imm14 = (insn>>2)&16383;
int vt5 = (insn>>16)&31;
int rs = (insn>>21)&31;


// Checked using VfpuTest: VWB.Q is equivalent to SV.Q
processor.cpu.doSVQ((vt5|(vt1<<5)), rs, (int)(short)(imm14 << 2));

// Checked using VfpuTest: VWB.Q is equivalent to SV.Q
SVQ.interpret(processor, insn);
}
@Override
public void compile(ICompilerContext context, int insn) {
int vt1 = (insn>>0)&1;
int vt5 = (insn>>16)&31;
int vt = vt5 | (vt1<<5);
int simm14 = context.getImm14(true);
int rs = context.getRsRegisterIndex();
int vsize = 4;

for (int n = 0; n < vsize; n++) {
context.prepareMemWrite32(rs, simm14 + n * 4);
context.loadVtInt(vsize, vt, n);
context.memWrite32(rs, simm14 + n * 4);
}
SVQ.compile(context, insn);
}
@Override
public String disasm(int address, int insn) {
Expand Down Expand Up @@ -6540,8 +6598,7 @@ public void compile(ICompilerContext context, int insn) {
int id = context.getVdRegisterIndex() & 3;
for (int n = 0; n < vsize; n++) {
context.prepareVdForStore(n);
float value = (id == n ? 1.0f : 0.0f);
context.getMethodVisitor().visitLdcInsn(value);
context.getMethodVisitor().visitInsn(id == n ? Opcodes.FCONST_1 : Opcodes.FCONST_0);
context.storeVd(n);
}
context.endPfxCompiled();
Expand Down Expand Up @@ -6709,7 +6766,7 @@ public void compile(ICompilerContext context, int insn) {
int vsize = context.getVsize();
for (int n = 0; n < vsize; n++) {
context.prepareVdForStore(n);
context.getMethodVisitor().visitLdcInsn(0.0f);
context.getMethodVisitor().visitInsn(Opcodes.FCONST_0);
context.storeVd(n);
}
context.endPfxCompiled();
Expand Down Expand Up @@ -6747,7 +6804,7 @@ public void compile(ICompilerContext context, int insn) {
int vsize = context.getVsize();
for (int n = 0; n < vsize; n++) {
context.prepareVdForStore(n);
context.getMethodVisitor().visitLdcInsn(1.0f);
context.getMethodVisitor().visitInsn(Opcodes.FCONST_1);
context.storeVd(n);
}
context.endPfxCompiled();
Expand Down Expand Up @@ -9665,8 +9722,7 @@ public void compile(ICompilerContext context, int insn) {
int id = (vd + i) & 3;
for (int n = 0; n < vsize; n++) {
context.prepareVdForStore(vsize, vd + i, n);
float value = (id == n ? 1.0f : 0.0f);
context.getMethodVisitor().visitLdcInsn(value);
context.getMethodVisitor().visitInsn(id == n ? Opcodes.FCONST_1 : Opcodes.FCONST_0);
context.storeVd(vsize, vd + i, n);
}
context.flushPfxCompiled(vsize, vd + i, true);
Expand Down Expand Up @@ -9708,7 +9764,7 @@ public void compile(ICompilerContext context, int insn) {
for (int i = 0; i < vsize; i++) {
for (int n = 0; n < vsize; n++) {
context.prepareVdForStore(vsize, vd + i, n);
context.getMethodVisitor().visitLdcInsn(0.0f);
context.getMethodVisitor().visitInsn(Opcodes.FCONST_0);
context.storeVd(vsize, vd + i, n);
}
context.flushPfxCompiled(vsize, vd + i, true);
Expand Down Expand Up @@ -9750,7 +9806,7 @@ public void compile(ICompilerContext context, int insn) {
for (int i = 0; i < vsize; i++) {
for (int n = 0; n < vsize; n++) {
context.prepareVdForStore(vsize, vd + i, n);
context.getMethodVisitor().visitLdcInsn(1.0f);
context.getMethodVisitor().visitInsn(Opcodes.FCONST_1);
context.storeVd(vsize, vd + i, n);
}
context.flushPfxCompiled(vsize, vd + i, true);
Expand Down
115 changes: 115 additions & 0 deletions src/jpcsp/Allegrex/compiler/CompilerContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ public class CompilerContext implements ICompilerContext {
private static final String profilerInternalName = Type.getInternalName(Profiler.class);
public static final String executableDescriptor = Type.getDescriptor(IExecutable.class);
public static final String executableInternalName = Type.getInternalName(IExecutable.class);
private static final String arraycopyDescriptor = "(" + Type.getDescriptor(Object.class) + "I" + Type.getDescriptor(Object.class) + "II)V";
private static Set<Integer> fastSyscalls;
private int instanceIndex;
private NativeCodeSequence preparedCallNativeCodeBlock = null;
Expand Down Expand Up @@ -1847,10 +1848,16 @@ public void setMethodVisitor(MethodVisitor mv) {
this.mv = mv;
}

@Override
public CodeInstruction getCodeInstruction() {
return codeInstruction;
}

@Override
public CodeInstruction getCodeInstruction(int address) {
return getCodeBlock().getCodeInstruction(address);
}

public void setCodeInstruction(CodeInstruction codeInstruction) {
this.codeInstruction = codeInstruction;
}
Expand Down Expand Up @@ -2622,6 +2629,7 @@ public boolean isSkipDelaySlot() {
return skipDelaySlot;
}

@Override
public void skipInstructions(int numberInstructionsToBeSkipped, boolean skipDelaySlot) {
this.numberInstructionsToBeSkipped = numberInstructionsToBeSkipped;
this.skipDelaySlot = skipDelaySlot;
Expand Down Expand Up @@ -3325,4 +3333,111 @@ public void compileVFPUInstr(Object cstBefore, int opcode, String mathFunction)
public void visitHook(NativeCodeSequence nativeCodeSequence) {
mv.visitMethodInsn(Opcodes.INVOKESTATIC, Type.getInternalName(nativeCodeSequence.getNativeCodeSequenceClass()), nativeCodeSequence.getMethodName(), "()V");
}

@Override
public boolean compileVFPULoad(int registerIndex, int offset, int vt, int count) {
if (RuntimeContext.memoryInt == null) {
// Can only generate an optimized code sequence for memoryInt
return false;
}

if ((vt & 32) != 0) {
// Optimization possible only for column access
return false;
}

// Build parameters for
// System.arraycopy(Object src, int srcPos, Object dest, int destPos, int length)
// i.e.
// System.arraycopy(RuntimeContext.memoryInt,
// RuntimeContext.checkMemoryRead32(rs + simm14, pc) >>> 2,
// RuntimeContext.vprInt,
// vprIndex,
// countSequence * 4);
loadMemoryInt();

loadRegister(registerIndex);
if (offset != 0) {
loadImm(offset);
mv.visitInsn(Opcodes.IADD);
}
if (checkMemoryAccess()) {
loadImm(getCodeInstruction().getAddress());
mv.visitMethodInsn(Opcodes.INVOKESTATIC, Type.getInternalName(RuntimeContext.class), "checkMemoryRead32", "(II)I");
loadImm(2);
mv.visitInsn(Opcodes.IUSHR);
} else {
loadImm(2);
mv.visitInsn(Opcodes.ISHL);
loadImm(4);
mv.visitInsn(Opcodes.IUSHR);
}

loadVprInt();
int vprIndex = VfpuState.getVprIndex((vt >> 2) & 7, vt & 3, (vt & 64) >> 6);
loadImm(vprIndex);
loadImm(count);
mv.visitMethodInsn(Opcodes.INVOKESTATIC, Type.getInternalName(System.class), "arraycopy", arraycopyDescriptor);

// Set the VPR float values
for (int i = 0; i < count; i++) {
loadVprFloat();
loadImm(vprIndex + i);
loadVprInt();
loadImm(vprIndex + i);
mv.visitInsn(Opcodes.IALOAD);
convertVIntToFloat();
mv.visitInsn(Opcodes.FASTORE);
}

return true;
}

@Override
public boolean compileVFPUStore(int registerIndex, int offset, int vt, int count) {
if (RuntimeContext.memoryInt == null) {
// Can only generate an optimized code sequence for memoryInt
return false;
}

if ((vt & 32) != 0) {
// Optimization possible only for column access
return false;
}

// Build parameters for
// System.arraycopy(Object src, int srcPos, Object dest, int destPos, int length)
// i.e.
// System.arraycopy(RuntimeContext.vprInt,
// vprIndex,
// RuntimeContext.memoryInt,
// RuntimeContext.checkMemoryWrite32(rs + simm14, pc) >>> 2,
// countSequence * 4);
loadVprInt();
int vprIndex = VfpuState.getVprIndex((vt >> 2) & 7, vt & 3, (vt & 64) >> 6);
loadImm(vprIndex);
loadMemoryInt();

loadRegister(registerIndex);
if (offset != 0) {
loadImm(offset);
mv.visitInsn(Opcodes.IADD);
}
if (checkMemoryAccess()) {
loadImm(getCodeInstruction().getAddress());
mv.visitMethodInsn(Opcodes.INVOKESTATIC, Type.getInternalName(RuntimeContext.class), "checkMemoryWrite32", "(II)I");
loadImm(2);
mv.visitInsn(Opcodes.IUSHR);
} else {
loadImm(2);
mv.visitInsn(Opcodes.ISHL);
loadImm(4);
mv.visitInsn(Opcodes.IUSHR);
}

loadImm(count);
mv.visitMethodInsn(Opcodes.INVOKESTATIC, Type.getInternalName(System.class), "arraycopy", arraycopyDescriptor);

return true;
}
}
5 changes: 5 additions & 0 deletions src/jpcsp/Allegrex/compiler/ICompilerContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,9 @@ public interface ICompilerContext {
public boolean isVsVdOverlap();
public boolean isVtVdOverlap();
public void compileVFPUInstr(Object cstBefore, int opcode, String mathFunction);
public boolean compileVFPULoad(int registerIndex, int offset, int vt, int count);
public boolean compileVFPUStore(int registerIndex, int offset, int vt, int count);
public CodeInstruction getCodeInstruction();
public CodeInstruction getCodeInstruction(int address);
public void skipInstructions(int numberInstructionsToBeSkipped, boolean skipDelaySlot);
}

1 comment on commit b40c65d

@sum2012
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.