Skip to content

Commit

Permalink
s390x/tcg: Implement VECTOR STRING RANGE COMPARE
Browse files Browse the repository at this point in the history
Unfortunately, there is no easy way to avoid looping over all elements
in v2. Provide specialized variants for !cc,!rt/!cc,rt/cc,!rt/cc,rt and
all element types. Especially for different values of rt, the compiler
might be able to optimize the code a lot.

Add s390_vec_write_element().

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
  • Loading branch information
davidhildenbrand committed Jun 7, 2019
1 parent be6324c commit 13b0228
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 0 deletions.
12 changes: 12 additions & 0 deletions target/s390x/helper.h
Expand Up @@ -236,6 +236,18 @@ DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)

#ifndef CONFIG_USER_ONLY
DEF_HELPER_3(servc, i32, env, i64, i64)
Expand Down
2 changes: 2 additions & 0 deletions target/s390x/insn-data.def
Expand Up @@ -1201,6 +1201,8 @@
F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC)
/* VECTOR ISOLATE STRING */
F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC)
/* VECTOR STRING RANGE COMPARE */
F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC)

#ifndef CONFIG_USER_ONLY
/* COMPARE AND SWAP AND PURGE */
Expand Down
59 changes: 59 additions & 0 deletions target/s390x/translate_vx.inc.c
Expand Up @@ -217,6 +217,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
16, 16, data, fn)
#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
ptr, 16, 16, data, fn)
#define gen_gvec_dup_i64(es, v1, c) \
tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
#define gen_gvec_mov(v1, v2) \
Expand Down Expand Up @@ -2479,3 +2483,58 @@ static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
}
return DISAS_NEXT;
}

static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m5);
const uint8_t m6 = get_field(s->fields, m6);
static gen_helper_gvec_4 * const g[3] = {
gen_helper_gvec_vstrc8,
gen_helper_gvec_vstrc16,
gen_helper_gvec_vstrc32,
};
static gen_helper_gvec_4 * const g_rt[3] = {
gen_helper_gvec_vstrc_rt8,
gen_helper_gvec_vstrc_rt16,
gen_helper_gvec_vstrc_rt32,
};
static gen_helper_gvec_4_ptr * const g_cc[3] = {
gen_helper_gvec_vstrc_cc8,
gen_helper_gvec_vstrc_cc16,
gen_helper_gvec_vstrc_cc32,
};
static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
gen_helper_gvec_vstrc_cc_rt8,
gen_helper_gvec_vstrc_cc_rt16,
gen_helper_gvec_vstrc_cc_rt32,
};

if (es > ES_32) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

if (extract32(m6, 0, 1)) {
if (extract32(m6, 2, 1)) {
gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
cpu_env, m6, g_cc_rt[es]);
} else {
gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
cpu_env, m6, g_cc[es]);
}
set_cc_static(s);
} else {
if (extract32(m6, 2, 1)) {
gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
m6, g_rt[es]);
} else {
gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
m6, g[es]);
}
}
return DISAS_NEXT;
}
21 changes: 21 additions & 0 deletions target/s390x/vec.h
Expand Up @@ -117,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
v->doubleword[enr] = data;
}

static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
uint8_t es, uint64_t data)
{
switch (es) {
case MO_8:
s390_vec_write_element8(v, enr, data);
break;
case MO_16:
s390_vec_write_element16(v, enr, data);
break;
case MO_32:
s390_vec_write_element32(v, enr, data);
break;
case MO_64:
s390_vec_write_element64(v, enr, data);
break;
default:
g_assert_not_reached();
}
}

#endif /* S390X_VEC_H */
143 changes: 143 additions & 0 deletions target/s390x/vec_string_helper.c
Expand Up @@ -328,3 +328,146 @@ void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
DEF_VISTR_CC_HELPER(8)
DEF_VISTR_CC_HELPER(16)
DEF_VISTR_CC_HELPER(32)

static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
{
const bool equal = extract32(c, 7, 1);
const bool lower = extract32(c, 6, 1);
const bool higher = extract32(c, 5, 1);

if (data < l) {
return lower;
} else if (data > l) {
return higher;
}
return equal;
}

static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
bool in, bool rt, bool zs, uint8_t es)
{
const uint64_t mask = get_element_lsbs_mask(es);
uint64_t a0 = s390_vec_read_element64(v2, 0);
uint64_t a1 = s390_vec_read_element64(v2, 1);
int first_zero = 16, first_match = 16;
S390Vector rt_result = {};
uint64_t z0, z1;
int i, j;

if (zs) {
z0 = zero_search(a0, mask);
z1 = zero_search(a1, mask);
first_zero = match_index(z0, z1);
}

for (i = 0; i < 16 / (1 << es); i++) {
const uint32_t data = s390_vec_read_element(v2, i, es);
const int cur_byte = i * (1 << es);
bool any_match = false;

/* if we don't need a bit vector, we can stop early */
if (cur_byte == first_zero && !rt) {
break;
}

for (j = 0; j < 16 / (1 << es); j += 2) {
const uint32_t l1 = s390_vec_read_element(v3, j, es);
const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
/* we are only interested in the highest byte of each element */
const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));

if (element_compare(data, l1, c1) &&
element_compare(data, l2, c2)) {
any_match = true;
break;
}
}
/* invert the result if requested */
any_match = in ^ any_match;

if (any_match) {
/* indicate bit vector if requested */
if (rt) {
const uint64_t val = -1ull;

first_match = MIN(cur_byte, first_match);
s390_vec_write_element(&rt_result, i, es, val);
} else {
/* stop on the first match */
first_match = cur_byte;
break;
}
}
}

if (rt) {
*(S390Vector *)v1 = rt_result;
} else {
s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
s390_vec_write_element64(v1, 1, 0);
}

if (first_zero == 16 && first_match == 16) {
return 3; /* no match */
} else if (first_zero == 16) {
return 1; /* matching elements, no match for zero */
} else if (first_match < first_zero) {
return 2; /* matching elements before match for zero */
}
return 0; /* match for zero */
}

#define DEF_VSTRC_HELPER(BITS) \
void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
}
DEF_VSTRC_HELPER(8)
DEF_VSTRC_HELPER(16)
DEF_VSTRC_HELPER(32)

#define DEF_VSTRC_RT_HELPER(BITS) \
void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
}
DEF_VSTRC_RT_HELPER(8)
DEF_VSTRC_RT_HELPER(16)
DEF_VSTRC_RT_HELPER(32)

#define DEF_VSTRC_CC_HELPER(BITS) \
void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, CPUS390XState *env, \
uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
}
DEF_VSTRC_CC_HELPER(8)
DEF_VSTRC_CC_HELPER(16)
DEF_VSTRC_CC_HELPER(32)

#define DEF_VSTRC_CC_RT_HELPER(BITS) \
void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, CPUS390XState *env, \
uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
}
DEF_VSTRC_CC_RT_HELPER(8)
DEF_VSTRC_CC_RT_HELPER(16)
DEF_VSTRC_CC_RT_HELPER(32)

0 comments on commit 13b0228

Please sign in to comment.