Skip to content

Commit

Permalink
target/arm: Support more GM blocksizes
Browse files Browse the repository at this point in the history
Support all of the easy GM block sizes.
Use direct memory operations, since the pointers are aligned.

While BS=2 (16 bytes, 1 tag) is a legal setting, that requires
an atomic store of one nibble.  This is not difficult, but there
is also no point in supporting it until required.

Note that cortex-a710 sets GM blocksize to match its cacheline
size of 64 bytes.  I expect many implementations will also
match the cacheline, which makes 16 bytes very unlikely.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20230811214031.171020-4-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
rth7680 authored and pm215 committed Aug 31, 2023
1 parent 851ec6e commit 7134cb0
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 12 deletions.
18 changes: 14 additions & 4 deletions target/arm/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2056,16 +2056,26 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
ID_PFR1, VIRTUALIZATION, 0);
}

if (cpu_isar_feature(aa64_mte, cpu)) {
/*
* The architectural range of GM blocksize is 2-6, however qemu
* doesn't support blocksize of 2 (see HELPER(ldgm)).
*/
if (tcg_enabled()) {
assert(cpu->gm_blocksize >= 3 && cpu->gm_blocksize <= 6);
}

#ifndef CONFIG_USER_ONLY
if (cpu->tag_memory == NULL && cpu_isar_feature(aa64_mte, cpu)) {
/*
* Disable the MTE feature bits if we do not have tag-memory
* provided by the machine.
*/
cpu->isar.id_aa64pfr1 =
FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
}
if (cpu->tag_memory == NULL) {
cpu->isar.id_aa64pfr1 =
FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
}
#endif
}

if (tcg_enabled()) {
/*
Expand Down
56 changes: 48 additions & 8 deletions target/arm/tcg/mte_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,8 @@ uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
int gm_bs = env_archcpu(env)->gm_blocksize;
int gm_bs_bytes = 4 << gm_bs;
void *tag_mem;
uint64_t ret;
int shift;

ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes);

Expand All @@ -443,16 +445,41 @@ uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)

/*
* The ordering of elements within the word corresponds to
* a little-endian operation.
* a little-endian operation. Computation of shift comes from
*
* index = address<LOG2_TAG_GRANULE+3:LOG2_TAG_GRANULE>
* data<index*4+3:index*4> = tag
*
* Because of the alignment of ptr above, BS=6 has shift=0.
* All memory operations are aligned. Defer support for BS=2,
* requiring insertion or extraction of a nibble, until we
* support a cpu that requires it.
*/
switch (gm_bs) {
case 3:
/* 32 bytes -> 2 tags -> 8 result bits */
ret = *(uint8_t *)tag_mem;
break;
case 4:
/* 64 bytes -> 4 tags -> 16 result bits */
ret = cpu_to_le16(*(uint16_t *)tag_mem);
break;
case 5:
/* 128 bytes -> 8 tags -> 32 result bits */
ret = cpu_to_le32(*(uint32_t *)tag_mem);
break;
case 6:
/* 256 bytes -> 16 tags -> 64 result bits */
return ldq_le_p(tag_mem);
return cpu_to_le64(*(uint64_t *)tag_mem);
default:
/* cpu configured with unsupported gm blocksize. */
/*
* CPU configured with unsupported/invalid gm blocksize.
* This is detected early in arm_cpu_realizefn.
*/
g_assert_not_reached();
}
shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4;
return ret << shift;
}

void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
Expand All @@ -462,6 +489,7 @@ void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
int gm_bs = env_archcpu(env)->gm_blocksize;
int gm_bs_bytes = 4 << gm_bs;
void *tag_mem;
int shift;

ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes);

Expand All @@ -478,13 +506,25 @@ void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
return;
}

/*
* The ordering of elements within the word corresponds to
* a little-endian operation.
*/
/* See LDGM for comments on BS and on shift. */
shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4;
val >>= shift;
switch (gm_bs) {
case 3:
/* 32 bytes -> 2 tags -> 8 result bits */
*(uint8_t *)tag_mem = val;
break;
case 4:
/* 64 bytes -> 4 tags -> 16 result bits */
*(uint16_t *)tag_mem = cpu_to_le16(val);
break;
case 5:
/* 128 bytes -> 8 tags -> 32 result bits */
*(uint32_t *)tag_mem = cpu_to_le32(val);
break;
case 6:
stq_le_p(tag_mem, val);
/* 256 bytes -> 16 tags -> 64 result bits */
*(uint64_t *)tag_mem = cpu_to_le64(val);
break;
default:
/* cpu configured with unsupported gm blocksize. */
Expand Down

0 comments on commit 7134cb0

Please sign in to comment.