Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
target/arm: Use tcg_gen_qemu_{st, ld}_i128 for do_fp_{st, ld}
While we don't require 16-byte atomicity here, using a single larger
operation simplifies the code.  Introduce finalize_memop_asimd for this.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20230530191438.411344-6-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
rth7680 authored and pm215 committed Jun 6, 2023
1 parent c74cc08 commit d450bd0
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 24 deletions.
35 changes: 11 additions & 24 deletions target/arm/tcg/translate-a64.c
Expand Up @@ -911,26 +911,20 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
{
/* This writes the bottom N bits of a 128 bit wide vector to memory */
TCGv_i64 tmplo = tcg_temp_new_i64();
MemOp mop;
MemOp mop = finalize_memop_asimd(s, size);

tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));

if (size < 4) {
mop = finalize_memop(s, size);
if (size < MO_128) {
tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
} else {
bool be = s->be_data == MO_BE;
TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
TCGv_i64 tmphi = tcg_temp_new_i64();
TCGv_i128 t16 = tcg_temp_new_i128();

tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
tcg_gen_concat_i64_i128(t16, tmplo, tmphi);

mop = s->be_data | MO_UQ;
tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
mop | (s->align_mem ? MO_ALIGN_16 : 0));
tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
get_mem_index(s), mop);
tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
}
}

Expand All @@ -942,24 +936,17 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
/* This always zero-extends and writes to a full 128 bit wide vector */
TCGv_i64 tmplo = tcg_temp_new_i64();
TCGv_i64 tmphi = NULL;
MemOp mop;
MemOp mop = finalize_memop_asimd(s, size);

if (size < 4) {
mop = finalize_memop(s, size);
if (size < MO_128) {
tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
} else {
bool be = s->be_data == MO_BE;
TCGv_i64 tcg_hiaddr;
TCGv_i128 t16 = tcg_temp_new_i128();

tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);

tmphi = tcg_temp_new_i64();
tcg_hiaddr = tcg_temp_new_i64();

mop = s->be_data | MO_UQ;
tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
mop | (s->align_mem ? MO_ALIGN_16 : 0));
tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
get_mem_index(s), mop);
tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
}

tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
Expand Down
24 changes: 24 additions & 0 deletions target/arm/tcg/translate.h
Expand Up @@ -609,6 +609,30 @@ static inline MemOp finalize_memop_pair(DisasContext *s, MemOp opc)
return finalize_memop_atom(s, opc, atom);
}

/**
* finalize_memop_asimd:
* @s: DisasContext
* @opc: size+sign+align of the memory operation
*
* Like finalize_memop_atom, but with atomicity of AccessType_ASIMD.
*/
static inline MemOp finalize_memop_asimd(DisasContext *s, MemOp opc)
{
/*
* In the pseudocode for Mem[], with AccessType_ASIMD, size == 16,
* if IsAligned(8), the first case provides separate atomicity for
* the pair of 64-bit accesses. If !IsAligned(8), the middle cases
* do not apply, and we're left with the final case of no atomicity.
* Thus MO_ATOM_IFALIGN_PAIR.
*
* For other sizes, normal LSE2 rules apply.
*/
if ((opc & MO_SIZE) == MO_128) {
return finalize_memop_atom(s, opc, MO_ATOM_IFALIGN_PAIR);
}
return finalize_memop(s, opc);
}

/**
* asimd_imm_const: Expand an encoded SIMD constant value
*
Expand Down

0 comments on commit d450bd0

Please sign in to comment.