Skip to content

Commit

Permalink
Delphi update for FMA instruction sets
Browse files Browse the repository at this point in the history
-> updated opcode to dB statements project (AVXPortToDelphi.dpr) for this FMA stuff
  • Loading branch information
mikerabat committed Mar 1, 2018
1 parent 6e346d7 commit 26611de
Show file tree
Hide file tree
Showing 14 changed files with 278 additions and 245 deletions.
2 changes: 1 addition & 1 deletion AVXMatrixAbsOperationsx64.pas
Expand Up @@ -120,7 +120,7 @@ procedure AVXMatrixAbsAligned(Dest : PDouble; const LineWidth, Width, Height : T
jz @nextLine;

{$IFDEF FPC}vmovsd xmm1, [rcx + rax];{$ELSE}db $C5,$FB,$10,$0C,$01;{$ENDIF}
{$IFDEF FPC}vandpd xmm1, xmm1, xmm0;{$ELSE}db $C5,$F1,$54,$C8;{$ENDIF}
{$IFDEF FPC}vandpd xmm1, xmm1, xmm0;{$ELSE}db $C5,$F1,$54,$C8;{$ENDIF}
{$IFDEF FPC}vmovsd [rcx + rax], xmm1;{$ELSE}db $C5,$FB,$11,$0C,$01;{$ENDIF}

@nextLine:
Expand Down
14 changes: 7 additions & 7 deletions AVXMatrixMultOperationsx64.pas
Expand Up @@ -567,7 +567,7 @@ procedure AVXMtxMultTria2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt2 :
// for y := 0 to height1 - 1
@@foryloop:
// tmp := 0;
vxorpd xmm0, xmm0, xmm0;
{$IFDEF FPC}vxorpd xmm0, xmm0, xmm0;{$ELSE}db $C5,$F9,$57,$C0;{$ENDIF}

// r8, mt2
mov rbx, r8;
Expand All @@ -580,13 +580,13 @@ procedure AVXMtxMultTria2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt2 :
jz @@foridxloopend;

@@foridxloop:
vmovsd xmm1, [rbx];
vmovsd xmm2, [rax + rsi];
{$IFDEF FPC}vmovsd xmm1, [rbx];{$ELSE}db $C5,$FB,$10,$0B;{$ENDIF}
{$IFDEF FPC}vmovsd xmm2, [rax + rsi];{$ELSE}db $C5,$FB,$10,$14,$30;{$ENDIF}

add rbx, r9; // + linewidth2

vmulsd xmm1, xmm1, xmm2;
vaddsd xmm0, xmm0, xmm1;
{$IFDEF FPC}vmulsd xmm1, xmm1, xmm2;{$ELSE}db $C5,$F3,$59,$CA;{$ENDIF}
{$IFDEF FPC}vaddsd xmm0, xmm0, xmm1;{$ELSE}db $C5,$FB,$58,$C1;{$ENDIF}

add rsi, 8;
jnz @@foridxloop;
Expand All @@ -598,7 +598,7 @@ procedure AVXMtxMultTria2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt2 :
add rbx, r10;
mov rsi, r12; // r12 = width2
dec rsi;
vmovsd [rbx + 8*rsi], xmm0;
{$IFDEF FPC}vmovsd [rbx + 8*rsi], xmm0;{$ELSE}db $C5,$FB,$11,$04,$F3;{$ENDIF}

// inc(PByte(pmT1), LineWidth1);
add rax, rdx;
Expand All @@ -616,7 +616,7 @@ procedure AVXMtxMultTria2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt2 :
jnz @@forxloop;

// cleanup stack
vzeroupper;
{$IFDEF FPC}vzeroupper;{$ELSE}db $C5,$F8,$77;{$ENDIF}

mov rbx, iRBX;
mov rdi, iRDI;
Expand Down
6 changes: 3 additions & 3 deletions AVXPrecompiled/AVXPortToDelphi.dpr
Expand Up @@ -83,7 +83,7 @@ begin
for i := 0 to slIn.Count - 1 do
begin
// AVX in the first few characters hints for a procedure/function name
if Pos('AVX', slIn[i] ) = 1 then
if (Pos('AVX', slIn[i] ) = 1) or (Pos('FMA', slIn[i]) = 1) then
begin
// search for the function name in the output file
s := slIn[i];
Expand Down Expand Up @@ -139,11 +139,11 @@ begin
idx := Pos('//', outLine);
cmt := '';
if idx > 0 then
begin
begin
cmt := Copy(outLine, idx, Length(outLine));
outLine := Copy(outLine, 1, idx - 1);
end;


idx := Pos('{$IFDEF FPC}', outLine);
if idx > 0 then
Expand Down
2 changes: 2 additions & 0 deletions AVXPrecompiled/processPasFiles.bat
Expand Up @@ -6,7 +6,9 @@ mkdir oFiles
echo Copy all object files

copy ..\Test\lib\i386-win32\AVX*.o .\oFiles\
copy ..\Test\lib\i386-win32\FMA*.o .\oFiles\
copy ..\Test\lib\x86_64-win64\AVX*x64.o .\oFiles\
copy ..\Test\lib\x86_64-win64\FMA*x64.o .\oFiles\


echo
Expand Down
26 changes: 13 additions & 13 deletions FMAMatrixMultOperations.pas
Expand Up @@ -144,8 +144,8 @@ procedure FMAMatrixMultAligned(dest : PDouble; const destLineWidth : TASMNativeI
{$IFDEF FPC}vinsertf128 ymm3, ymm3, xmm5, 1;{$ELSE}db $C4,$E3,$65,$18,$DD,$01;{$ENDIF}

// now multiply and add
vfmadd231pd ymm0, ymm2, ymm7;
vfmadd231pd ymm1, ymm3, ymm7;
{$IFDEF FPC}vfmadd231pd ymm0, ymm2, ymm7;{$ELSE}db $C4,$E2,$ED,$B8,$C7;{$ENDIF}
{$IFDEF FPC}vfmadd231pd ymm1, ymm3, ymm7;{$ELSE}db $C4,$E2,$E5,$B8,$CF;{$ENDIF}
add eax, 32;
jl @@InnerLoop1;

Expand Down Expand Up @@ -329,8 +329,8 @@ procedure FMAMatrixMultUnAligned(dest : PDouble; const destLineWidth : TASMNativ
{$IFDEF FPC}vinsertf128 ymm3, ymm3, xmm5, 1;{$ELSE}db $C4,$E3,$65,$18,$DD,$01;{$ENDIF}

// now multiply and add
vfmadd231pd ymm0, ymm2, ymm7;
vfmadd231pd ymm1, ymm3, ymm7;
{$IFDEF FPC}vfmadd231pd ymm0, ymm2, ymm7;{$ELSE}db $C4,$E2,$ED,$B8,$C7;{$ENDIF}
{$IFDEF FPC}vfmadd231pd ymm1, ymm3, ymm7;{$ELSE}db $C4,$E2,$E5,$B8,$CF;{$ENDIF}

add eax, 32;
jl @@InnerLoop1;
Expand Down Expand Up @@ -495,8 +495,8 @@ procedure FMAMtxMultTria2T1StoreT1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt
jg @@foriloopFMAend;

{$IFDEF FPC}vmovupd ymm1, [ecx + eax - 32];{$ELSE}db $C5,$FD,$10,$4C,$01,$E0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [ebx + eax - 32];{$ELSE}db $C5,$FD,$10,$54,$03,$E0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vmovupd ymm2, [ebx + eax - 32];{$ELSE}db $C5,$FD,$10,$54,$03,$E0;{$ENDIF}
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}

jmp @@foriloopFMA;

Expand All @@ -513,7 +513,7 @@ procedure FMAMtxMultTria2T1StoreT1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt
// two elements at a time:
{$IFDEF FPC}vmovupd xmm1, [ecx + eax];{$ELSE}db $C5,$F9,$10,$0C,$01;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [ebx + eax];{$ELSE}db $C5,$F9,$10,$14,$03;{$ENDIF}
vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}

add eax, 16;
jnz @@foriloop;
Expand Down Expand Up @@ -629,7 +629,7 @@ procedure FMAMtxMultTria2TUpperUnit(dest : PDouble; LineWidthDest : TASMNativeIn
// 4 elements at a time:
{$IFDEF FPC}vmovupd ymm1, [edx + eax - 32];{$ELSE}db $C5,$FD,$10,$4C,$02,$E0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [ebx + eax - 32];{$ELSE}db $C5,$FD,$10,$54,$03,$E0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}
jmp @@foriloop;

@@foriloopend:
Expand All @@ -643,7 +643,7 @@ procedure FMAMtxMultTria2TUpperUnit(dest : PDouble; LineWidthDest : TASMNativeIn
// need to process two more elements:
{$IFDEF FPC}vmovupd xmm1, [edx + eax];{$ELSE}db $C5,$F9,$10,$0C,$02;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [ebx + eax];{$ELSE}db $C5,$F9,$10,$14,$03;{$ENDIF}
vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}

@@finalizeloop:

Expand Down Expand Up @@ -774,7 +774,7 @@ procedure FMAMtxMultTria2T1(dest : PDouble; LineWidthDest : TASMNativeInt; mt1 :
{$IFDEF FPC}vmovddup xmm1, [esi];{$ELSE}db $C5,$FB,$12,$0E;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [edi];{$ELSE}db $C5,$F9,$10,$17;{$ENDIF}

vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}

//inc(PByte(valCounter1), LineWidth1);
//inc(PByte(valCounter2), LineWidth2);
Expand Down Expand Up @@ -896,11 +896,11 @@ procedure FMAMtxMultLowTria2T2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt;
@@foridxlongloop:
{$IFDEF FPC}vmovupd ymm1, [eax + edi - 64];{$ELSE}db $C5,$FD,$10,$4C,$38,$C0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [edx + edi - 64];{$ELSE}db $C5,$FD,$10,$54,$3A,$C0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}

{$IFDEF FPC}vmovupd ymm1, [eax + edi - 32];{$ELSE}db $C5,$FD,$10,$4C,$38,$E0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [edx + edi - 32];{$ELSE}db $C5,$FD,$10,$54,$3A,$E0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}
add edi, 64;
jl @@foridxlongloop;

Expand All @@ -915,7 +915,7 @@ procedure FMAMtxMultLowTria2T2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt;
@@foridxSSEloop:
{$IFDEF FPC}vmovupd xmm1, [eax + edi - 16];{$ELSE}db $C5,$F9,$10,$4C,$38,$F0;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [edx + edi - 16];{$ELSE}db $C5,$F9,$10,$54,$3A,$F0;{$ENDIF}
vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}
add edi, 16;
jl @@foridxSSEloop;

Expand Down
24 changes: 12 additions & 12 deletions FMAMatrixMultOperationsx64.pas
Expand Up @@ -164,8 +164,8 @@ procedure FMAMatrixMultAligned(dest : PDouble; const destLineWidth : TASMNativeI
{$IFDEF FPC}vinsertf128 ymm3, ymm3, xmm5, 1;{$ELSE}db $C4,$E3,$65,$18,$DD,$01;{$ENDIF}

// now multiply and add
vfmadd231pd ymm0, ymm7, ymm2;
vfmadd231pd ymm1, ymm7, ymm3;
{$IFDEF FPC}vfmadd231pd ymm0, ymm7, ymm2;{$ELSE}db $C4,$E2,$C5,$B8,$C2;{$ENDIF}
{$IFDEF FPC}vfmadd231pd ymm1, ymm7, ymm3;{$ELSE}db $C4,$E2,$C5,$B8,$CB;{$ENDIF}
add rax, 32;
jl @@InnerLoop1;

Expand Down Expand Up @@ -380,8 +380,8 @@ procedure FMAMatrixMultUnAligned(dest : PDouble; const destLineWidth : TASMNativ
{$IFDEF FPC}vinsertf128 ymm3, ymm3, xmm5, 1;{$ELSE}db $C4,$E3,$65,$18,$DD,$01;{$ENDIF}

// now multiply and add
vfmadd231pd ymm0, ymm7, ymm2;
vfmadd231pd ymm1, ymm7, ymm3;
{$IFDEF FPC}vfmadd231pd ymm0, ymm7, ymm2;{$ELSE}db $C4,$E2,$C5,$B8,$C2;{$ENDIF}
{$IFDEF FPC}vfmadd231pd ymm1, ymm7, ymm3;{$ELSE}db $C4,$E2,$C5,$B8,$CB;{$ENDIF}
add rax, 32;
jl @@InnerLoop1;

Expand Down Expand Up @@ -563,7 +563,7 @@ procedure FMAMtxMultTria2T1StoreT1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt

{$IFDEF FPC}vmovupd ymm1, [rcx + rax - 32];{$ELSE}db $C5,$FD,$10,$4C,$01,$E0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [rbx + rax - 32];{$ELSE}db $C5,$FD,$10,$54,$03,$E0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}
jmp @@foriloopFMA;

@@foriloopFMAend:
Expand All @@ -579,7 +579,7 @@ procedure FMAMtxMultTria2T1StoreT1(mt1 : PDouble; LineWidth1 : TASMNativeInt; mt
// two elements at a time:
{$IFDEF FPC}vmovupd xmm1, [rcx + rax];{$ELSE}db $C5,$F9,$10,$0C,$01;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [rbx + rax];{$ELSE}db $C5,$F9,$10,$14,$03;{$ENDIF}
vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}

add rax, 16;
jnz @@foriloop;
Expand Down Expand Up @@ -712,7 +712,7 @@ procedure FMAMtxMultTria2TUpperUnit(dest : PDouble; LineWidthDest : TASMNativeIn
// two elements at a time:
{$IFDEF FPC}vmovupd ymm1, [r8 + rax - 32];{$ELSE}db $C4,$C1,$7D,$10,$4C,$00,$E0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [rbx + rax - 32];{$ELSE}db $C5,$FD,$10,$54,$03,$E0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}
jmp @@foriloop;

@@foriloopend:
Expand All @@ -726,7 +726,7 @@ procedure FMAMtxMultTria2TUpperUnit(dest : PDouble; LineWidthDest : TASMNativeIn
// need to process two more elements:
{$IFDEF FPC}vmovupd xmm1, [r8 + rax];{$ELSE}db $C4,$C1,$79,$10,$0C,$00;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [rbx + rax];{$ELSE}db $C5,$F9,$10,$14,$03;{$ENDIF}
vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}

@@finalizeloop:

Expand Down Expand Up @@ -884,7 +884,7 @@ procedure FMAMtxMultTria2T1(dest : PDouble; LineWidthDest : TASMNativeInt; mt1 :
{$IFDEF FPC}vmovddup xmm1, [rsi];{$ELSE}db $C5,$FB,$12,$0E;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [rdi];{$ELSE}db $C5,$F9,$10,$17;{$ENDIF}

vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}

//inc(PByte(valCounter1), LineWidth1);
//inc(PByte(valCounter2), LineWidth2);
Expand Down Expand Up @@ -1024,11 +1024,11 @@ procedure FMAMtxMultLowTria2T2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt;
@@foridxlongloop:
{$IFDEF FPC}vmovupd ymm1, [rax + rdi - 64];{$ELSE}db $C5,$FD,$10,$4C,$38,$C0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [r8 + rdi - 64];{$ELSE}db $C4,$C1,$7D,$10,$54,$38,$C0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}

{$IFDEF FPC}vmovupd ymm1, [rax + rdi - 32];{$ELSE}db $C5,$FD,$10,$4C,$38,$E0;{$ENDIF}
{$IFDEF FPC}vmovupd ymm2, [r8 + rdi - 32];{$ELSE}db $C4,$C1,$7D,$10,$54,$38,$E0;{$ENDIF}
vfmadd231pd ymm0, ymm1, ymm2;
{$IFDEF FPC}vfmadd231pd ymm0, ymm1, ymm2;{$ELSE}db $C4,$E2,$F5,$B8,$C2;{$ENDIF}
add rdi, 64;
jl @@foridxlongloop;

Expand All @@ -1042,7 +1042,7 @@ procedure FMAMtxMultLowTria2T2Store1(mt1 : PDouble; LineWidth1 : TASMNativeInt;
@@foridxSSEloop:
{$IFDEF FPC}vmovupd xmm1, [rax + rdi - 16];{$ELSE}db $C5,$F9,$10,$4C,$38,$F0;{$ENDIF}
{$IFDEF FPC}vmovupd xmm2, [r8 + rdi - 16];{$ELSE}db $C4,$C1,$79,$10,$54,$38,$F0;{$ENDIF}
vfmadd231pd xmm0, xmm1, xmm2;
{$IFDEF FPC}vfmadd231pd xmm0, xmm1, xmm2;{$ELSE}db $C4,$E2,$F1,$B8,$C2;{$ENDIF}
add rdi, 16;
jl @@foridxSSEloop;

Expand Down

0 comments on commit 26611de

Please sign in to comment.