Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Found kkrunchy_k7 023.a4/asm07 (most recent version) source code

  • Loading branch information...
commit 5ddf84f8c5c511b59b284911fda17aa6509b7bf4 1 parent fd16b03
@rygorous rygorous authored
View
2  kkrunchy_k7/_config.hpp
@@ -14,4 +14,4 @@
#define sINTRO 0 // compile for small size
#define sDEBUG 1 // include debug code even in release build
#define sAPPNAME "kkrunchy" // application name for window title
-#define sVERSION "0.23 alpha 3" // version number string
+#define sVERSION "0.23a4/asm07" // version number string
View
40 kkrunchy_k7/depack2.asm
@@ -4,7 +4,7 @@
bits 32
-%define NBUFFERS 22
+%define NBUFFERS 20
%define BUFFER dataArea.buffer
struc dataArea
@@ -243,9 +243,9 @@ DisUnfilter:
cmp al, 0x04
jne .nosib
- xchg esi, [ebp+BUFFER+21*4]
+ xchg esi, [ebp+BUFFER+19*4]
movsb
- xchg esi, [ebp+BUFFER+21*4]
+ xchg esi, [ebp+BUFFER+19*4]
.nosib mov dl, ch
and dl, 0xc0
@@ -276,13 +276,8 @@ DisUnfilter:
inc ebx
.nomr5 xchg esi, [ebp+BUFFER+13*4+ebx*4]
lodsd
- dec esi
xchg esi, [ebp+BUFFER+13*4+ebx*4]
- xchg esi, [ebp+BUFFER+18*4]
- shl eax, 8
- lodsb
- xchg esi, [ebp+BUFFER+18*4]
- ror eax, 8
+ bswap eax
stosd
cmp word [ebp+dataArea.codebuf], 0x24ff
@@ -301,15 +296,8 @@ DisUnfilter:
jnz .noad
xchg esi, [ebp+BUFFER+15*4]
- lodsd
- dec esi
+ movsd
xchg esi, [ebp+BUFFER+15*4]
- xchg esi, [ebp+BUFFER+19*4]
- shl eax, 8
- lodsb
- xchg esi, [ebp+BUFFER+19*4]
- ror eax, 8
- stosd
jmp .main
.noad dec cl
@@ -324,12 +312,8 @@ DisUnfilter:
cmp byte [edi-1], 0xe8
je .dwcal
- xchg esi, [ebp+BUFFER+20*4]
- lodsw
- xchg esi, [ebp+BUFFER+20*4]
xchg esi, [ebp+BUFFER+17*4]
- shl eax, 16
- lodsw
+ lodsd
xchg esi, [ebp+BUFFER+17*4]
shr eax, 1
@@ -350,14 +334,10 @@ DisUnfilter:
mov eax, [ebp+dataArea.funcTable+eax*4]
jmp short .storad
-.dcesc xchg esi, [ebp+BUFFER+20*4]
- lodsw
- xchg esi, [ebp+BUFFER+20*4]
- xchg esi, [ebp+BUFFER+16*4]
- shl eax, 16
- lodsw
- xchg esi, [ebp+BUFFER+16*4]
-
+.dcesc xchg esi, [ebp+BUFFER+18*4]
+ lodsd
+ xchg esi, [ebp+BUFFER+18*4]
+
mov ebx, [ebp+dataArea.funcTable]
mov [ebp+dataArea.funcTable+ebx*4], eax
.dcinc inc byte [ebp+dataArea.funcTable]
View
99 kkrunchy_k7/depacker.asm
@@ -8,21 +8,20 @@ bits 32
section .bss
-%define MEMSHIFT 18
+%define MEMSHIFT 23
%define MEM (1<<MEMSHIFT)
-%define NMODEL 9
-%define NINPUT 40
-%define NWEIGHT (256+256+16)
+%define NMODEL 11
+%define NINPUT 48
+%define NWEIGHT (256+256+16+128)
%define MAXLEN 2047
%define USEAPM 1
-%define APMSIZE 1024
+%define APMSIZE 8192
struc ContextModel
.cpr resd 1
.cps resd 1
.ctx resd 1
.st resd 1
- .t resb MEM*2
.size:
endstruc
@@ -53,7 +52,8 @@ struc Work
.APMi resd 1
%endif
.cm resb ContextModel.size * NMODEL
- .match resd MEM
+ .match resd MEM/16
+ .modelMem resb MEM
.tx resw NINPUT
.wx resw NINPUT*NWEIGHT
.tx2 resw 4
@@ -127,9 +127,9 @@ squash:
contextHash:
mov edx, eax
- shr edx, MEMSHIFT-1
- and eax, ((MEM / 2) - 1) & ~1
- lea eax, [esi+ContextModel.t+eax*4]
+ shr edx, 24
+ and eax, ((MEM / 4) - 1) & ~1
+ lea eax, [ebp+Work.modelMem+eax*4]
cmp dl, [eax]
jne .notequal1
@@ -159,7 +159,7 @@ contextHash:
; in: eax=err
; esi=t
; edi=w
-; ecx=n/8
+; ecx=n/4
; out: esi,edi advanced.
train:
movd mm0, eax
@@ -168,8 +168,6 @@ train:
pcmpeqb mm1, mm1
psrlw mm1, 15
-%if 1
- add ecx, ecx
.lp:
movq mm3, [esi]
movq mm2, [edi]
@@ -182,28 +180,6 @@ train:
add esi, byte 8
add edi, byte 8
loop .lp
-%else
-.lp:
- movq mm3, [esi]
- movq mm5, [esi+8]
- movq mm2, [edi]
- movq mm4, [edi+8]
- paddsw mm3, mm3
- paddsw mm5, mm5
- pmulhw mm3, mm0
- pmulhw mm5, mm0
- paddsw mm3, mm1
- paddsw mm5, mm1
- psraw mm3, 1
- psraw mm5, 1
- paddsw mm2, mm3
- paddsw mm4, mm5
- movq [edi], mm2
- movq [edi+8], mm4
- add esi, byte 16
- add edi, byte 16
- loop .lp
-%endif
ret
@@ -406,7 +382,7 @@ STAGE0ENTRY:
xor eax, eax
call contextHash
mov [esi+ContextModel.cps], eax
- add esi, ContextModel.size
+ add esi, byte ContextModel.size
loop .ctxModelLoop
%if USEAPM
@@ -448,7 +424,7 @@ STAGE0ENTRY:
mov eax, [ebp+Work.zeroprob]
neg edx
inc eax
- and edx, 4095
+ shr edx, 32-12 ; eqv. to and edx, 4095 here
add eax, edx
shr eax, 1
mov [ebp+Work.zeroprob], eax
@@ -570,7 +546,7 @@ STAGE0ENTRY:
imul edi, byte (NINPUT*2)
lea edi, [ebp+Work.wx+edi]
mov esi, WorkData+Work.tx
- push byte (NINPUT/8)
+ push byte (NINPUT/4)
pop ecx
call train
@@ -625,14 +601,20 @@ STAGE0ENTRY:
; fnv hash function
; assumes edi=_bufPtr
mov eax, 0x811c9dc5
+ inc ecx
+ imul eax, ecx
+ dec ecx
mov bl, [masks+ecx-1]
+ mov dh, [bitm+ecx-1]
.hashnext:
dec edi
shr bl, 1
jnc .hashnotset
- xor al, [edi]
+ mov dl, [edi]
+ and dl, dh
+ xor al, dl
imul eax, 0x01000193
jmp short .hashnext
@@ -643,7 +625,7 @@ STAGE0ENTRY:
; match processing
; assumes esi points to "match"
- and eax, MEM-1
+ and eax, (MEM/16)-1
mov edi, [ebp+Work.dst]
xchg edi, [esi+eax*4] ; now edi=Match[h]
mov esi, [ebp+Work.dst]
@@ -735,7 +717,7 @@ STAGE0ENTRY:
cmp eax, 400
jbe .contextmodels
- mov dword [ebp+Work.ctx], 526
+ mov dword [ebp+Work.ctx], 512+14
jmp .mix
; context models
@@ -821,7 +803,7 @@ STAGE0ENTRY:
cmp ebx, byte 1
sbb dword [ebp+Work.ctx+8], byte -1
- add esi, ContextModel.size
+ add esi, byte ContextModel.size
dec ecx
jnz near .contextloop
@@ -833,6 +815,17 @@ STAGE0ENTRY:
inc ah
mov [ebp+Work.ctx+4], eax
+ xor ebx, ebx
+ mov eax, [ebp+Work.matchl]
+ dec eax
+ cmovs eax, ebx
+ not bl
+ cmp eax, ebx
+ cmova eax, ebx
+ mov eax, [ebp+Work.runTable+eax*4]
+ shr eax, 3
+ add [ebp+Work.ctx+8], eax
+
.mix:
; perform mixing
push byte 2
@@ -845,7 +838,6 @@ STAGE0ENTRY:
mov esi, WorkData+Work.tx
pxor mm0, mm0
-%if 1
push byte (NINPUT/4)
pop ecx
@@ -857,23 +849,6 @@ STAGE0ENTRY:
add esi, byte 8
add edi, byte 8
loop .mixdploop
-%else
- push byte (NINPUT/8)
- pop ecx
-
-.mixdploop:
- movq mm1, [esi]
- movq mm2, [esi+8]
- pmaddwd mm1, [edi]
- pmaddwd mm2, [edi+8]
- psrad mm1, 8
- psrad mm2, 8
- paddd mm0, mm1
- paddd mm0, mm2
- add esi, byte 16
- add edi, byte 16
- loop .mixdploop
-%endif
movq mm1, mm0
psrlq mm1, 32
@@ -971,4 +946,6 @@ section .data
squashTab dw 1, 2, 4, 6, 10, 17, 27, 45, 74, 120, 194
dw 311, 488, 747,1102,1546,2048,2550,2994,3349,3608,3785
dw 3902,3976,4022,4051,4069,4079,4086,4090,4092,4094,4095
-masks db 0x1f, 0x07, 0x0a, 0x09, 0x05, 0x03, 0x04, 0x02, 0x01
+masks db 0x1f, 0x27, 0x88, 0x07, 0x0a, 0x09, 0x05, 0x03, 0x04, 0x02, 0x01
+bitm db 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
View
57 kkrunchy_k7/dis.cpp
@@ -94,6 +94,19 @@ static sU8 Tablefx[32] =
// caaaaareful with this table (match disdeck!)
/****************************************************************************/
+
+static sU32 bswap(sU32 x)
+{
+ __asm
+ {
+ mov eax, [x];
+ bswap eax;
+ mov [x], eax;
+ }
+
+ return x;
+}
+
/****************************************************************************/
DataBuffer::DataBuffer()
@@ -289,7 +302,7 @@ sInt DisFilter::ProcessInstr(sU8 *instr,sU32 memory,sU32 VA)
if((modrm & 0x07) == 4 && (modrm & 0xc0) != 0xc0)
{
sib = *instr++;
- Buffer[21].PutValue(sib,1);
+ Buffer[19].PutValue(sib,1);
}
if((modrm & 0xc0) == 0x40)
@@ -298,8 +311,7 @@ sInt DisFilter::ProcessInstr(sU8 *instr,sU32 memory,sU32 VA)
if((modrm & 0xc0) == 0x80 || (modrm & 0xc7) == 0x05 || ((modrm & 0xc0) == 0 && (sib & 0x07) == 5))
{
val = *(sU32 *) instr; instr += 4;
- Buffer[(modrm & 0xc7) == 5 ? 14 : 13].PutValue(val & 0xffffff,3);
- Buffer[18].PutValue(val >> 24,1);
+ Buffer[(modrm & 0xc7) == 5 ? 14 : 13].PutValue(bswap(val),4);
if(code == 0xff && modrm == 0x24)
JumpTable = sMin(JumpTable,val);
@@ -312,8 +324,7 @@ sInt DisFilter::ProcessInstr(sU8 *instr,sU32 memory,sU32 VA)
{
case fAD:
val = *(sU32 *) instr; instr += 4;
- Buffer[15].PutValue(val & 0xffffff,3);
- Buffer[19].PutValue(val >> 24,1);
+ Buffer[15].PutValue(val,4);
break;
case fBR:
@@ -323,12 +334,12 @@ sInt DisFilter::ProcessInstr(sU8 *instr,sU32 memory,sU32 VA)
case fDR:
val = *(sU32 *) instr; instr += 4;
val += (instr - start) + memory;
+
if(code != 0xe8)
{
i = val - LastJump;
tmp = (i < 0) ? -i * 2 - 1 : i * 2;
- Buffer[17].PutValue(tmp & 0xffff,2);
- Buffer[20].PutValue(tmp >> 16,2);
+ Buffer[17].PutValue(tmp,4);
LastJump = val;
}
else
@@ -340,8 +351,7 @@ sInt DisFilter::ProcessInstr(sU8 *instr,sU32 memory,sU32 VA)
Buffer[16].PutValue(i+1,1);
if(i == 255)
{
- Buffer[16].PutValue(val & 0xffff,2);
- Buffer[20].PutValue(val >> 16,2);
+ Buffer[18].PutValue(val,4);
FuncTable[FuncTablePos] = val;
if(++FuncTablePos == 255)
@@ -573,8 +583,8 @@ void DisUnFilter(sU8 *packed,sU8 *dest,sU32 oldAddr,sU32 newAddr,ReorderBuffer &
if((modrm & 0x07) == 4 && (modrm & 0xc0) != 0xc0)
{
- reord.Add(newAddr + buffer[21] - opacked,1,dest - oldDest + oldAddr,1);
- sib = *buffer[21]++;
+ reord.Add(newAddr + buffer[19] - opacked,1,dest - oldDest + oldAddr,1);
+ sib = *buffer[19]++;
*dest++ = sib;
}
@@ -588,11 +598,9 @@ void DisUnFilter(sU8 *packed,sU8 *dest,sU32 oldAddr,sU32 newAddr,ReorderBuffer &
{
i = (modrm & 0xc7) == 5 ? 14 : 13;
- reord.Add(newAddr + buffer[18] - opacked,1,dest - oldDest + oldAddr + 3,1);
- reord.Add(newAddr + buffer[i] - opacked,3,dest - oldDest + oldAddr,3);
+ reord.Add(newAddr + buffer[i] - opacked,4,dest - oldDest + oldAddr,4);
- val = (*buffer[18]++) << 24;
- val |= (*(sU32 *) buffer[i]) & 0xffffff; buffer[i] += 3;
+ val = bswap(*(sU32 *) buffer[i]); buffer[i] += 4;
*(sU32 *) dest = val; dest += 4;
if(code == 0xff && modrm == 0x24)
@@ -610,10 +618,8 @@ void DisUnFilter(sU8 *packed,sU8 *dest,sU32 oldAddr,sU32 newAddr,ReorderBuffer &
switch(flags & fTYPE)
{
case fAD:
- reord.Add(newAddr + buffer[19] - opacked,1,dest - oldDest + oldAddr + 3,1);
- reord.Add(newAddr + buffer[15] - opacked,3,dest - oldDest + oldAddr,3);
- val = (*buffer[19]++) << 24;
- val |= (*(sU32 *) buffer[15]) & 0xffffff; buffer[15] += 3;
+ reord.Add(newAddr + buffer[15] - opacked,4,dest - oldDest + oldAddr,4);
+ val = *(sU32 *) buffer[15]; buffer[15] += 4;
*(sU32 *) dest = val; dest += 4;
break;
@@ -633,10 +639,8 @@ void DisUnFilter(sU8 *packed,sU8 *dest,sU32 oldAddr,sU32 newAddr,ReorderBuffer &
}
else
{
- reord.Add(newAddr + buffer[16] - 1 - opacked,3,dest - oldDest + oldAddr,2);
- reord.Add(newAddr + buffer[20] - opacked,2,dest - oldDest + oldAddr + 2,2);
- val = *(sU16 *) buffer[16]; buffer[16] += 2;
- val |= (*(sU16 *) buffer[20]) << 16; buffer[20] += 2;
+ reord.Add(newAddr + buffer[18] - opacked,4,dest - oldDest + oldAddr,4);
+ val = *(sU32 *) buffer[18]; buffer[18] += 4;
funcTable[funcTablePos] = val;
if(++funcTablePos == 256)
@@ -645,10 +649,8 @@ void DisUnFilter(sU8 *packed,sU8 *dest,sU32 oldAddr,sU32 newAddr,ReorderBuffer &
}
else
{
- reord.Add(newAddr + buffer[17] - opacked,2,dest - oldDest + oldAddr,2);
- reord.Add(newAddr + buffer[20] - opacked,2,dest - oldDest + oldAddr + 2,2);
- val = *(sU16 *) buffer[17]; buffer[17] += 2;
- val |= (*(sU16 *) buffer[20]) << 16; buffer[20] += 2;
+ reord.Add(newAddr + buffer[17] - opacked,4,dest - oldDest + oldAddr,4);
+ val = *(sU32 *) buffer[17]; buffer[17] += 4;
if(val & 1)
val = ~(val >> 1);
else
@@ -700,3 +702,4 @@ void DisUnFilter(sU8 *packed,sU8 *dest,sU32 oldAddr,sU32 newAddr,ReorderBuffer &
memory += dest - start;
}
}
+
View
2  kkrunchy_k7/dis.hpp
@@ -8,7 +8,7 @@
/****************************************************************************/
-#define NBUFFERS 22
+#define NBUFFERS 20
struct DataBuffer
{
View
11 kkrunchy_k7/exepacker.cpp
@@ -8,6 +8,8 @@
#include "debuginfo.hpp"
#include "dis.hpp"
+#define NODEPACK2 0
+
/****************************************************************************/
extern "C" sU8 depacker[],depacker2[];
@@ -816,15 +818,19 @@ sU8 *EXEPacker::Pack(sU8 *source,sInt &outSize,DebugInfo *info,PackerCallback cb
sSetMem(UnImage,0,dataSize+8);
UnImage += 8;
+#if !NODEPACK2
sCopyMem(UnImage,depacker2,depacker2Size);
depack2.Init(UnImage,depacker2Size);
+#endif
*((sU32 *) (UnImage + depacker2Size)) = ImportSize;
sCopyMem(UnImage + depacker2Size + 4,Imports,ImportSize);
ImportSize += 4;
delete[] Imports;
+#if !NODEPACK2
sCopyMem(UnImage + depacker2Size + ImportSize,filter.Output.Data,filter.Output.Size);
+#endif
UnImageVA = UnImage + depacker2Size + ImportSize + filter.Output.Size - Section[0].VirtAddr;
@@ -848,7 +854,9 @@ sU8 *EXEPacker::Pack(sU8 *source,sInt &outSize,DebugInfo *info,PackerCallback cb
// clean up
CleanupUnImage();
+#if !NODEPACK2
sSetMem(UnImageVA + PH->CodeStart,0,codeSize);
+#endif
// copy blobs (and write blob ptrs to ptrtable)
if(blobs)
@@ -864,11 +872,13 @@ sU8 *EXEPacker::Pack(sU8 *source,sInt &outSize,DebugInfo *info,PackerCallback cb
}
// set up (stage2) depacker params
+#if !NODEPACK2
depack2Base = PH->ImageStart + Section[0].VirtAddr - depacker2Size - ImportSize - filter.Output.Size;
depack2.PatchOffsets(depack2Base);
depack2.Patch("DEPACKTABLE",filter.Table,sizeof(filter.Table));
depack2.PatchDWord("DCOD",PH->ImageStart + PH->CodeStart);
depack2.PatchJump("ETRY",PH->ImageStart + PH->Entry,depack2Base);
+#endif
skip = 0;
while(skip<dataSize && !UnImage[dataSize-skip-1])
@@ -1056,3 +1066,4 @@ sU32 EXEPacker::GetActualSize()
{
return ActualSize;
}
+
View
100 kkrunchy_k7/model_asm.asm
@@ -8,21 +8,20 @@ bits 32
section .bss
-%define MEMSHIFT 18
+%define MEMSHIFT 23
%define MEM (1<<MEMSHIFT)
-%define NMODEL 9
-%define NINPUT 40
-%define NWEIGHT (256+256+16)
+%define NMODEL 11
+%define NINPUT 48
+%define NWEIGHT (256+256+16+128)
%define MAXLEN 2047
%define USEAPM 1
-%define APMSIZE 1024
+%define APMSIZE 8192
struc ContextModel
.cpr resd 1
.cps resd 1
.ctx resd 1
.st resd 1
- .t resb MEM*2
.size:
endstruc
@@ -42,7 +41,8 @@ struc Work
.APMi resd 1
%endif
.cm resb ContextModel.size * NMODEL
- .match resd MEM
+ .match resd MEM/16
+ .modelMem resb MEM
.tx resw NINPUT
.wx resw NINPUT*NWEIGHT
.tx2 resw 4
@@ -103,9 +103,9 @@ squash:
contextHash:
mov edx, eax
- shr edx, MEMSHIFT-1
- and eax, ((MEM / 2) - 1) & ~1
- lea eax, [esi+ContextModel.t+eax*4]
+ shr edx, 24
+ and eax, ((MEM / 4) - 1) & ~1
+ lea eax, [ebp+Work.modelMem+eax*4]
cmp dl, [eax]
jne .notequal1
@@ -135,7 +135,7 @@ contextHash:
; in: eax=err
; esi=t
; edi=w
-; ecx=n/8
+; ecx=n/4
; out: esi,edi advanced.
train:
movd mm0, eax
@@ -144,8 +144,6 @@ train:
pcmpeqb mm1, mm1
psrlw mm1, 15
-%if 1
- add ecx, ecx
.lp:
movq mm3, [esi]
movq mm2, [edi]
@@ -158,28 +156,6 @@ train:
add esi, byte 8
add edi, byte 8
loop .lp
-%else
-.lp:
- movq mm3, [esi]
- movq mm5, [esi+8]
- movq mm2, [edi]
- movq mm4, [edi+8]
- paddsw mm3, mm3
- paddsw mm5, mm5
- pmulhw mm3, mm0
- pmulhw mm5, mm0
- paddsw mm3, mm1
- paddsw mm5, mm1
- psraw mm3, 1
- psraw mm5, 1
- paddsw mm2, mm3
- paddsw mm4, mm5
- movq [edi], mm2
- movq [edi+8], mm4
- add esi, byte 16
- add edi, byte 16
- loop .lp
-%endif
ret
@@ -343,7 +319,7 @@ global @modelASM@4
xor eax, eax
call contextHash
mov [esi+ContextModel.cps], eax
- add esi, ContextModel.size
+ add esi, byte ContextModel.size
loop .ctxModelLoop
%if USEAPM
@@ -404,7 +380,7 @@ global @modelASM@4
imul edi, byte (NINPUT*2)
lea edi, [ebp+Work.wx+edi]
mov esi, WorkData+Work.tx
- push byte (NINPUT/8)
+ push byte (NINPUT/4)
pop ecx
call train
@@ -459,25 +435,32 @@ global @modelASM@4
; fnv hash function
; assumes edi=_bufPtr
mov eax, 0x811c9dc5
+ inc ecx
+ imul eax, ecx
+ dec ecx
mov bl, [masks+ecx-1]
+ mov dh, [bitm+ecx-1]
.hashnext:
dec edi
shr bl, 1
jnc .hashnotset
- xor al, [edi]
+ mov dl, [edi]
+ and dl, dh
+ xor al, dl
+
imul eax, 0x01000193
jmp short .hashnext
.hashnotset:
jnz .hashnext
- add esi, ContextModel.size
+ add esi, byte ContextModel.size
loop .updcontext
; match processing
; assumes esi points to "match"
- and eax, MEM-1
+ and eax, (MEM/16)-1
mov edi, [_bufPtr]
xchg edi, [esi+eax*4] ; now edi=Match[h]
mov esi, [_bufPtr]
@@ -569,7 +552,7 @@ global @modelASM@4
cmp eax, 400
jbe .contextmodels
- mov dword [ebp+Work.ctx], 526
+ mov dword [ebp+Work.ctx], 512+14
jmp .mix
; context models
@@ -655,7 +638,7 @@ global @modelASM@4
cmp ebx, byte 1
sbb dword [ebp+Work.ctx+8], byte -1
- add esi, ContextModel.size
+ add esi, byte ContextModel.size
dec ecx
jnz near .contextloop
@@ -667,6 +650,17 @@ global @modelASM@4
inc ah
mov [ebp+Work.ctx+4], eax
+ xor ebx, ebx
+ mov eax, [ebp+Work.matchl]
+ dec eax
+ cmovs eax, ebx
+ not bl
+ cmp eax, ebx
+ cmova eax, ebx
+ mov eax, [ebp+Work.runTable+eax*4]
+ shr eax, 3
+ add [ebp+Work.ctx+8], eax
+
.mix:
; perform mixing
push byte 2
@@ -679,7 +673,6 @@ global @modelASM@4
mov esi, WorkData+Work.tx
pxor mm0, mm0
-%if 1
push byte (NINPUT/4)
pop ecx
@@ -691,23 +684,6 @@ global @modelASM@4
add esi, byte 8
add edi, byte 8
loop .mixdploop
-%else
- push byte (NINPUT/8)
- pop ecx
-
-.mixdploop:
- movq mm1, [esi]
- movq mm2, [esi+8]
- pmaddwd mm1, [edi]
- pmaddwd mm2, [edi+8]
- psrad mm1, 8
- psrad mm2, 8
- paddd mm0, mm1
- paddd mm0, mm2
- add esi, byte 16
- add edi, byte 16
- loop .mixdploop
-%endif
movq mm1, mm0
psrlq mm1, 32
@@ -819,4 +795,6 @@ extern _startBufPtr
squashTab dw 1, 2, 4, 6, 10, 17, 27, 45, 74, 120, 194
dw 311, 488, 747,1102,1546,2048,2550,2994,3349,3608,3785
dw 3902,3976,4022,4051,4069,4079,4086,4090,4092,4094,4095
-masks db 0x1f, 0x07, 0x0a, 0x09, 0x05, 0x03, 0x04, 0x02, 0x01
+masks db 0x1f, 0x27, 0x88, 0x07, 0x0a, 0x09, 0x05, 0x03, 0x04, 0x02, 0x01
+bitm db 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
Please sign in to comment.
Something went wrong with that request. Please try again.