Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LoongArch64 assembly pack: Fix ChaCha20 ABI breakage #22817

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 36 additions & 10 deletions crypto/chacha/asm/chacha-loongarch64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$x)=map("\$r$_",(12..21));
my ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8)=map("\$r$_",(23..31));

# The saved floating-point registers in the LP64D ABI. In LoongArch
# with vector extension, the low 64 bits of a vector register alias with
# the corresponding FPR. So we must save and restore the corresponding
# FPR if we'll write into a vector register. The ABI only requires
# saving and restoring the FPR (i.e. 64 bits of the corresponding vector
# register), not the entire vector register.
my ($fs0,$fs1,$fs2,$fs3,$fs4,$fs5,$fs6,$fs7)=map("\$f$_",(24..31));

# Here is the 128-bit vector register layout for LSX extension.
my ($vr0,$vr1,$vr2,$vr3,$vr4,$vr5,$vr6,$vr7,$vr8,$vr9,$vr10,
$vr11,$vr12,$vr13,$vr14,$vr15,$vr16,$vr17,$vr18,$vr19,
Expand Down Expand Up @@ -66,13 +74,25 @@
la.pcrel $t0,OPENSSL_loongarch_hwcap_P
ld.w $t0,$t0,0

bleu $len,$t3,.LChaCha20_1x # goto 1x when len <= 64

andi $t0,$t0,LOONGARCH_HWCAP_LASX | LOONGARCH_HWCAP_LSX
beqz $t0,.LChaCha20_1x

addi.d $sp,$sp,-64
fst.d $fs0,$sp,0
fst.d $fs1,$sp,8
fst.d $fs2,$sp,16
fst.d $fs3,$sp,24
fst.d $fs4,$sp,32
fst.d $fs5,$sp,40
fst.d $fs6,$sp,48
fst.d $fs7,$sp,56

andi $t1,$t0,LOONGARCH_HWCAP_LASX
bnez $t1,.LChaCha20_8x

andi $t2,$t0,LOONGARCH_HWCAP_LSX
bnez $t2,.LChaCha20_4x

b .LChaCha20_1x
b .LChaCha20_4x

EOF

Expand Down Expand Up @@ -442,8 +462,6 @@ sub ROUND_4x {
.align 6
.LChaCha20_4x:
ori $t3,$zero,64
bleu $len,$t3,.LChaCha20_1x # goto 1x when len <= 64

addi.d $sp,$sp,-128

# Save the initial block counter in $t4
Expand Down Expand Up @@ -783,7 +801,7 @@ sub ROUND_4x {

.Ldone_4x:
addi.d $sp,$sp,128
b .Lend
b .Lrestore_saved_fpr

EOF
}
Expand Down Expand Up @@ -869,8 +887,6 @@ sub ROUND_8x {
.align 6
.LChaCha20_8x:
ori $t3,$zero,64
bleu $len,$t3,.LChaCha20_1x # goto 1x when len <= 64

addi.d $sp,$sp,-128

# Save the initial block counter in $t4
Expand Down Expand Up @@ -1394,12 +1410,22 @@ sub ROUND_8x {

.Ldone_8x:
addi.d $sp,$sp,128
b .Lend
b .Lrestore_saved_fpr

EOF
}

$code .= <<EOF;
.Lrestore_saved_fpr:
fld.d $fs0,$sp,0
fld.d $fs1,$sp,8
fld.d $fs2,$sp,16
fld.d $fs3,$sp,24
fld.d $fs4,$sp,32
fld.d $fs5,$sp,40
fld.d $fs6,$sp,48
fld.d $fs7,$sp,56
addi.d $sp,$sp,64
.Lno_data:
.Lend:
jr $ra
Expand Down