58 changes: 43 additions & 15 deletions tcg/tcg.c
Expand Up @@ -94,6 +94,19 @@ typedef struct QEMU_PACKED {
DebugFrameFDEHeader fde;
} DebugFrameHeader;

typedef struct TCGLabelQemuLdst {
bool is_ld; /* qemu_ld: true, qemu_st: false */
MemOpIdx oi;
TCGType type; /* result type of a load */
TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */
TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */
TCGReg datalo_reg; /* reg index for low word to be loaded or stored */
TCGReg datahi_reg; /* reg index for high word to be loaded or stored */
const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */
tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
} TCGLabelQemuLdst;

static void tcg_register_jit_int(const void *buf, size_t size,
const void *debug_frame,
size_t debug_frame_size)
Expand Down Expand Up @@ -793,6 +806,25 @@ static void init_ffi_layouts(void)
}
#endif /* CONFIG_TCG_INTERPRETER */

static inline bool arg_slot_reg_p(unsigned arg_slot)
{
/*
* Split the sizeof away from the comparison to avoid Werror from
* "unsigned < 0 is always false", when iarg_regs is empty.
*/
unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
return arg_slot < nreg;
}

static inline int arg_slot_stk_ofs(unsigned arg_slot)
{
unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);

tcg_debug_assert(stk_slot < max);
return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
}

typedef struct TCGCumulativeArgs {
int arg_idx; /* tcg_gen_callN args[] */
int info_in_idx; /* TCGHelperInfo in[] */
Expand Down Expand Up @@ -1032,6 +1064,7 @@ static void init_call_layout(TCGHelperInfo *info)
}
}
assert(ref_base + cum.ref_slot <= max_stk_slots);
ref_base += max_reg_slots;

if (ref_base != 0) {
for (int i = cum.info_in_idx - 1; i >= 0; --i) {
Expand Down Expand Up @@ -3218,7 +3251,7 @@ liveness_pass_1(TCGContext *s)
case TCG_CALL_ARG_NORMAL:
case TCG_CALL_ARG_EXTEND_U:
case TCG_CALL_ARG_EXTEND_S:
if (REG_P(loc)) {
if (arg_slot_reg_p(loc->arg_slot)) {
*la_temp_pref(ts) = 0;
break;
}
Expand All @@ -3245,7 +3278,7 @@ liveness_pass_1(TCGContext *s)
case TCG_CALL_ARG_NORMAL:
case TCG_CALL_ARG_EXTEND_U:
case TCG_CALL_ARG_EXTEND_S:
if (REG_P(loc)) {
if (arg_slot_reg_p(loc->arg_slot)) {
tcg_regset_set_reg(*la_temp_pref(ts),
tcg_target_call_iarg_regs[loc->arg_slot]);
}
Expand Down Expand Up @@ -4803,7 +4836,7 @@ static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
}
}

static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
TCGRegSet allocated_regs)
{
/*
Expand All @@ -4813,30 +4846,27 @@ static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
*/
temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
TCG_TARGET_CALL_STACK_OFFSET +
stk_slot * sizeof(tcg_target_long));
arg_slot_stk_ofs(arg_slot));
}

static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
TCGTemp *ts, TCGRegSet *allocated_regs)
{
if (REG_P(l)) {
if (arg_slot_reg_p(l->arg_slot)) {
TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
load_arg_reg(s, reg, ts, *allocated_regs);
tcg_regset_set_reg(*allocated_regs, reg);
} else {
load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
ts, *allocated_regs);
load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
}
}

static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
intptr_t ref_off, TCGRegSet *allocated_regs)
{
TCGReg reg;
int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);

if (stk_slot < 0) {
if (arg_slot_reg_p(arg_slot)) {
reg = tcg_target_call_iarg_regs[arg_slot];
tcg_reg_free(s, reg, *allocated_regs);
tcg_out_addi_ptr(s, reg, ref_base, ref_off);
Expand All @@ -4846,8 +4876,7 @@ static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
*allocated_regs, 0, false);
tcg_out_addi_ptr(s, reg, ref_base, ref_off);
tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
TCG_TARGET_CALL_STACK_OFFSET
+ stk_slot * sizeof(tcg_target_long));
arg_slot_stk_ofs(arg_slot));
}
}

Expand Down Expand Up @@ -4877,8 +4906,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
case TCG_CALL_ARG_BY_REF:
load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
TCG_TARGET_CALL_STACK_OFFSET
+ loc->ref_slot * sizeof(tcg_target_long),
arg_slot_stk_ofs(loc->ref_slot),
&allocated_regs);
break;
case TCG_CALL_ARG_BY_REF_N:
Expand Down
5 changes: 4 additions & 1 deletion tests/tcg/ppc64/Makefile.target
Expand Up @@ -20,7 +20,7 @@ PPC64_TESTS += mtfsf
PPC64_TESTS += mffsce

ifneq ($(CROSS_CC_HAS_POWER10),)
PPC64_TESTS += byte_reverse sha512-vector
PPC64_TESTS += byte_reverse sha512-vector vector
endif
byte_reverse: CFLAGS += -mcpu=power10
run-byte_reverse: QEMU_OPTS+=-cpu POWER10
Expand All @@ -31,6 +31,9 @@ sha512-vector: sha512.c

run-sha512-vector: QEMU_OPTS+=-cpu POWER10

vector: CFLAGS += -mcpu=power10 -I$(SRC_PATH)/include
run-vector: QEMU_OPTS += -cpu POWER10

PPC64_TESTS += signal_save_restore_xer
PPC64_TESTS += xxspltw

Expand Down
51 changes: 51 additions & 0 deletions tests/tcg/ppc64/vector.c
@@ -0,0 +1,51 @@
#include <assert.h>
#include <stdint.h>
#include "qemu/compiler.h"

int main(void)
{
unsigned int result_wi;
vector unsigned char vbc_bi_src = { 0xFF, 0xFF, 0, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0,
0, 0xFF, 0xFF};
vector unsigned short vbc_hi_src = { 0xFFFF, 0, 0, 0xFFFF,
0, 0, 0xFFFF, 0xFFFF};
vector unsigned int vbc_wi_src = {0, 0, 0xFFFFFFFF, 0xFFFFFFFF};
vector unsigned long long vbc_di_src = {0xFFFFFFFFFFFFFFFF, 0};
vector __uint128_t vbc_qi_src;

asm("vextractbm %0, %1" : "=r" (result_wi) : "v" (vbc_bi_src));
#if HOST_BIG_ENDIAN
assert(result_wi == 0b1101111111000011);
#else
assert(result_wi == 0b1100001111111011);
#endif

asm("vextracthm %0, %1" : "=r" (result_wi) : "v" (vbc_hi_src));
#if HOST_BIG_ENDIAN
assert(result_wi == 0b10010011);
#else
assert(result_wi == 0b11001001);
#endif

asm("vextractwm %0, %1" : "=r" (result_wi) : "v" (vbc_wi_src));
#if HOST_BIG_ENDIAN
assert(result_wi == 0b0011);
#else
assert(result_wi == 0b1100);
#endif

asm("vextractdm %0, %1" : "=r" (result_wi) : "v" (vbc_di_src));
#if HOST_BIG_ENDIAN
assert(result_wi == 0b10);
#else
assert(result_wi == 0b01);
#endif

vbc_qi_src[0] = 0x1;
vbc_qi_src[0] = vbc_qi_src[0] << 127;
asm("vextractqm %0, %1" : "=r" (result_wi) : "v" (vbc_qi_src));
assert(result_wi == 0b1);

return 0;
}