Skip to content

Commit

Permalink
Merge tag 'pull-tcg-20230915-2' of https://gitlab.com/rth7680/qemu in…
Browse files Browse the repository at this point in the history
…to staging

*: Delete checks for old host definitions
tcg/loongarch64: Generate LSX instructions
fpu: Add conversions between bfloat16 and [u]int8
fpu: Handle m68k extended precision denormals properly
accel/tcg: Improve cputlb i/o organization
accel/tcg: Simplify tlb_plugin_lookup
accel/tcg: Remove false-negative halted assertion
tcg: Add gvec compare with immediate and scalar operand
tcg/aarch64: Emit BTI insns at jump landing pads

[Resolved conflict between CPUINFO_PMULL and CPUINFO_BTI.
--Stefan]

* tag 'pull-tcg-20230915-2' of https://gitlab.com/rth7680/qemu: (39 commits)
  tcg: Map code_gen_buffer with PROT_BTI
  tcg/aarch64: Emit BTI insns at jump landing pads
  util/cpuinfo-aarch64: Add CPUINFO_BTI
  tcg: Add tcg_out_tb_start backend hook
  fpu: Handle m68k extended precision denormals properly
  fpu: Add conversions between bfloat16 and [u]int8
  accel/tcg: Introduce do_st16_mmio_leN
  accel/tcg: Introduce do_ld16_mmio_beN
  accel/tcg: Merge io_writex into do_st_mmio_leN
  accel/tcg: Merge io_readx into do_ld_mmio_beN
  accel/tcg: Replace direct use of io_readx/io_writex in do_{ld,st}_1
  accel/tcg: Merge cpu_transaction_failed into io_failed
  plugin: Simplify struct qemu_plugin_hwaddr
  accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed
  accel/tcg: Split out io_prepare and io_failed
  accel/tcg: Simplify tlb_plugin_lookup
  target/arm: Use tcg_gen_gvec_cmpi for compare vs 0
  tcg: Add gvec compare with immediate and scalar operand
  tcg/loongarch64: Implement 128-bit load & store
  tcg/loongarch64: Lower rotli_vec to vrotri
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
stefanhaRH committed Sep 19, 2023
2 parents 13d6b16 + a97a837 commit d775494
Show file tree
Hide file tree
Showing 39 changed files with 7,535 additions and 509 deletions.
435 changes: 210 additions & 225 deletions accel/tcg/cputlb.c

Large diffs are not rendered by default.

9 changes: 2 additions & 7 deletions accel/tcg/tcg-accel-ops-mttcg.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
break;
case EXCP_HALTED:
/*
* during start-up the vCPU is reset and the thread is
* kicked several times. If we don't ensure we go back
* to sleep in the halted state we won't cleanly
* start-up when the vCPU is enabled.
*
* cpu->halted should ensure we sleep in wait_io_event
* Usually cpu->halted is set, but may have already been
* reset by another thread by the time we arrive here.
*/
g_assert(cpu->halted);
break;
case EXCP_ATOMIC:
qemu_mutex_unlock_iothread();
Expand Down
26 changes: 26 additions & 0 deletions accel/tcg/tcg-runtime-gvec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,32 @@ DO_CMP2(64)
#undef DO_CMP1
#undef DO_CMP2

#define DO_CMP1(NAME, TYPE, OP) \
void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
{ \
intptr_t oprsz = simd_oprsz(desc); \
TYPE inv = simd_data(desc), b = b64; \
for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
*(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
} \
clear_high(d, oprsz, desc); \
}

#define DO_CMP2(SZ) \
DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)

DO_CMP2(8)
DO_CMP2(16)
DO_CMP2(32)
DO_CMP2(64)

#undef DO_CMP1
#undef DO_CMP2

void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
Expand Down
25 changes: 25 additions & 0 deletions accel/tcg/tcg-runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
7 changes: 4 additions & 3 deletions fpu/softfloat-parts.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
} else {
int shift = frac_normalize(p);
p->cls = float_class_normal;
p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
p->exp = fmt->frac_shift - fmt->exp_bias
- shift + !fmt->m68k_denormal;
}
} else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
p->cls = float_class_normal;
Expand Down Expand Up @@ -256,7 +257,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
is_tiny = !frac_addi(&discard, p, inc);
}

frac_shrjam(p, 1 - exp);
frac_shrjam(p, !fmt->m68k_denormal - exp);

if (p->frac_lo & round_mask) {
/* Need to recompute round-to-even/round-to-odd. */
Expand Down Expand Up @@ -287,7 +288,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
p->frac_lo &= ~round_mask;
}

exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
frac_shr(p, frac_shift);

if (is_tiny && (flags & float_flag_inexact)) {
Expand Down
67 changes: 66 additions & 1 deletion fpu/softfloat.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ typedef struct {
* round_mask: bits below lsb which must be rounded
* The following optional modifiers are available:
* arm_althp: handle ARM Alternative Half Precision
* m68k_denormal: explicit integer bit for extended precision may be 1
*/
typedef struct {
int exp_size;
Expand All @@ -526,6 +527,7 @@ typedef struct {
int frac_size;
int frac_shift;
bool arm_althp;
bool m68k_denormal;
uint64_t round_mask;
} FloatFmt;

Expand Down Expand Up @@ -576,7 +578,12 @@ static const FloatFmt float128_params = {
static const FloatFmt floatx80_params[3] = {
[floatx80_precision_s] = { FLOATX80_PARAMS(23) },
[floatx80_precision_d] = { FLOATX80_PARAMS(52) },
[floatx80_precision_x] = { FLOATX80_PARAMS(64) },
[floatx80_precision_x] = {
FLOATX80_PARAMS(64),
#ifdef TARGET_M68K
.m68k_denormal = true,
#endif
},
};

/* Unpack a float to parts, but do not canonicalize. */
Expand Down Expand Up @@ -3126,6 +3133,15 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}

int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
FloatParts64 p;

bfloat16_unpack_canonical(&p, a, s);
return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
}

int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
Expand Down Expand Up @@ -3392,6 +3408,11 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
}

int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
{
return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
}

int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
{
return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
Expand All @@ -3407,6 +3428,11 @@ int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
}

int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
}

int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
Expand Down Expand Up @@ -3534,6 +3560,15 @@ uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
}

uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
int scale, float_status *s)
{
FloatParts64 p;

bfloat16_unpack_canonical(&p, a, s);
return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
}

uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
int scale, float_status *s)
{
Expand Down Expand Up @@ -3759,6 +3794,11 @@ Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
}

uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
{
return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
}

uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
{
return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
Expand All @@ -3774,6 +3814,11 @@ uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
}

uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
}

uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
{
return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
Expand Down Expand Up @@ -3929,6 +3974,11 @@ bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
return int64_to_bfloat16_scalbn(a, scale, status);
}

bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
{
return int64_to_bfloat16_scalbn(a, scale, status);
}

bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
{
return int64_to_bfloat16_scalbn(a, 0, status);
Expand All @@ -3944,6 +3994,11 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
return int64_to_bfloat16_scalbn(a, 0, status);
}

bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
{
return int64_to_bfloat16_scalbn(a, 0, status);
}

float128 int128_to_float128(Int128 a, float_status *status)
{
FloatParts128 p = { };
Expand Down Expand Up @@ -4139,6 +4194,11 @@ bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
return uint64_to_bfloat16_scalbn(a, scale, status);
}

bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
{
return uint64_to_bfloat16_scalbn(a, scale, status);
}

bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
{
return uint64_to_bfloat16_scalbn(a, 0, status);
Expand All @@ -4154,6 +4214,11 @@ bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
return uint64_to_bfloat16_scalbn(a, 0, status);
}

bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
{
return uint64_to_bfloat16_scalbn(a, 0, status);
}

float128 uint64_to_float128(uint64_t a, float_status *status)
{
FloatParts128 p;
Expand Down
1 change: 1 addition & 0 deletions host/include/aarch64/host/cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define CPUINFO_LSE2 (1u << 2)
#define CPUINFO_AES (1u << 3)
#define CPUINFO_PMULL (1u << 4)
#define CPUINFO_BTI (1u << 5)

/* Initialized with a constructor. */
extern unsigned cpuinfo;
Expand Down
12 changes: 6 additions & 6 deletions include/exec/cpu-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@
typedef struct CPUTLBEntryFull {
/*
* @xlat_section contains:
* - in the lower TARGET_PAGE_BITS, a physical section number
* - with the lower TARGET_PAGE_BITS masked off, an offset which
* must be added to the virtual address to obtain:
* + the ram_addr_t of the target RAM (if the physical section
* number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
* + the offset within the target MemoryRegion (otherwise)
* - For ram, an offset which must be added to the virtual address
* to obtain the ram_addr_t of the target RAM
* - For other memory regions,
* + in the lower TARGET_PAGE_BITS, the physical section number
* + with the TARGET_PAGE_BITS masked off, the offset within
* the target MemoryRegion
*/
hwaddr xlat_section;

Expand Down
3 changes: 1 addition & 2 deletions include/exec/user/thunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,7 @@ static inline int thunk_type_size(const argtype *type_ptr, int is_host)
if (is_host) {
#if defined(HOST_X86_64)
return 8;
#elif defined(HOST_ALPHA) || defined(HOST_IA64) || defined(HOST_MIPS) || \
defined(HOST_PARISC) || defined(HOST_SPARC64)
#elif defined(HOST_MIPS) || defined(HOST_SPARC64)
return 4;
#elif defined(HOST_PPC)
return sizeof(void *);
Expand Down
12 changes: 12 additions & 0 deletions include/fpu/softfloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,46 +366,58 @@ float32 bfloat16_to_float32(bfloat16, float_status *status);
bfloat16 float64_to_bfloat16(float64 a, float_status *status);
float64 bfloat16_to_float64(bfloat16 a, float_status *status);

int8_t bfloat16_to_int8_scalbn(bfloat16, FloatRoundMode,
int, float_status *status);
int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode,
int, float_status *status);
int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
int, float_status *status);
int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode,
int, float_status *status);

int8_t bfloat16_to_int8(bfloat16, float_status *status);
int16_t bfloat16_to_int16(bfloat16, float_status *status);
int32_t bfloat16_to_int32(bfloat16, float_status *status);
int64_t bfloat16_to_int64(bfloat16, float_status *status);

int8_t bfloat16_to_int8_round_to_zero(bfloat16, float_status *status);
int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status);
int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status);
int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status);

uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode,
int, float_status *status);
uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode,
int, float_status *status);
uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
int, float_status *status);
uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode,
int, float_status *status);

uint8_t bfloat16_to_uint8(bfloat16 a, float_status *status);
uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status);
uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status);
uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status);

uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *status);
uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status);
uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status);
uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status);

bfloat16 int8_to_bfloat16_scalbn(int8_t a, int, float_status *status);
bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status);
bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status);
bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status);
bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int, float_status *status);
bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status);
bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status);
bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status);

bfloat16 int8_to_bfloat16(int8_t a, float_status *status);
bfloat16 int16_to_bfloat16(int16_t a, float_status *status);
bfloat16 int32_to_bfloat16(int32_t a, float_status *status);
bfloat16 int64_to_bfloat16(int64_t a, float_status *status);
bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status);
bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status);
bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status);
bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);
Expand Down
13 changes: 0 additions & 13 deletions include/hw/core/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,17 +227,6 @@ struct CPUWatchpoint {
QTAILQ_ENTRY(CPUWatchpoint) entry;
};

#ifdef CONFIG_PLUGIN
/*
* For plugins we sometime need to save the resolved iotlb data before
* the memory regions get moved around by io_writex.
*/
typedef struct SavedIOTLB {
MemoryRegionSection *section;
hwaddr mr_offset;
} SavedIOTLB;
#endif

struct KVMState;
struct kvm_run;

Expand Down Expand Up @@ -409,8 +398,6 @@ struct CPUState {

#ifdef CONFIG_PLUGIN
GArray *plugin_mem_cbs;
/* saved iotlb data from io_writex */
SavedIOTLB saved_iotlb;
#endif

/* TODO Move common fields from CPUArchState here. */
Expand Down

0 comments on commit d775494

Please sign in to comment.