diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 24a5afc32ec3..6b9af2163ae7 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -3282,10 +3282,20 @@ static void handle_simd_shifti(DisasContext *s, uint32_t insn) case 0x14: /* SSHLL / USHLL */ accumulate = round = false; shift = shift - (8 << size); + if (size >= 3) { + unallocated_encoding(s); + return; + } /* Do as if datasize is 64 always. */ if (is_q) freg_offs_n += sizeof(float64); is_q = false; + /* For the LL variants the store is larger than the load, + so if rd == rn we would overwrite parts of our input. + So load everything right now and use shifts in the main + loop. */ + tmp2 = tcg_temp_new_i64(); + simd_ld(tmp2, freg_offs_n, 3, false); break; default: /* So we don't implement any of the Narrow or saturating shifts, @@ -3299,7 +3309,19 @@ static void handle_simd_shifti(DisasContext *s, uint32_t insn) tmp2 = tcg_temp_new_i64(); for (i = 0; i < (is_q ? 16 : 8); i += ebytes) { - simd_ld(tcg_tmp, freg_offs_n + i, size, !is_u); + if (opcode != 0x14) + simd_ld(tcg_tmp, freg_offs_n + i, size, !is_u); + else { + tcg_gen_shri_i64(tcg_tmp, tmp2, i*8); + switch (size << 1 | is_u) { + case 0: tcg_gen_ext8s_i64 (tcg_tmp, tcg_tmp); break; + case 1: tcg_gen_ext8u_i64 (tcg_tmp, tcg_tmp); break; + case 2: tcg_gen_ext16s_i64 (tcg_tmp, tcg_tmp); break; + case 3: tcg_gen_ext16u_i64 (tcg_tmp, tcg_tmp); break; + case 4: tcg_gen_ext32s_i64 (tcg_tmp, tcg_tmp); break; + case 5: tcg_gen_ext32u_i64 (tcg_tmp, tcg_tmp); break; + } + } if (round) tcg_gen_addi_i64(tcg_tmp, tcg_tmp, 1 << (shift - 1)); switch (opcode) {