Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-2…
Browse files Browse the repository at this point in the history
…0210525' into staging

target-arm queue:
 * Implement SVE2 emulation
 * Implement integer matrix multiply accumulate
 * Implement FEAT_TLBIOS
 * Implement FEAT_TLBRANGE
 * disas/libvixl: Protect C system header for C++ compiler
 * Use correct SP in M-profile exception return
 * AN524, AN547: Correct modelling of internal SRAMs
 * hw/intc/arm_gicv3_cpuif: Fix EOIR write access check logic
 * hw/arm/smmuv3: Another range invalidation fix

# gpg: Signature made Tue 25 May 2021 16:02:25 BST
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20210525: (114 commits)
  target/arm: Enable SVE2 and related extensions
  linux-user/aarch64: Enable hwcap bits for sve2 and related extensions
  target/arm: Implement integer matrix multiply accumulate
  target/arm: Implement aarch32 VSUDOT, VUSDOT
  target/arm: Split decode of VSDOT and VUDOT
  target/arm: Split out do_neon_ddda
  target/arm: Fix decode for VDOT (indexed)
  target/arm: Remove unused fpst from VDOT_scalar
  target/arm: Split out do_neon_ddda_fpst
  target/arm: Implement aarch64 SUDOT, USDOT
  target/arm: Implement SVE2 fp multiply-add long
  target/arm: Move endian adjustment macros to vec_internal.h
  target/arm: Implement SVE2 bitwise shift immediate
  target/arm: Implement 128-bit ZIP, UZP, TRN
  target/arm: Implement SVE2 LD1RO
  target/arm: Tidy do_ldrq
  target/arm: Share table of sve load functions
  target/arm: Implement SVE2 FLOGB
  target/arm: Implement SVE2 FCVTXNT, FCVTX
  target/arm: Implement SVE2 FCVTLT
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed May 25, 2021
2 parents 0dab1d3 + f8680aa commit 92f8c6f
Show file tree
Hide file tree
Showing 33 changed files with 8,266 additions and 1,291 deletions.
231 changes: 114 additions & 117 deletions accel/tcg/cputlb.c
Expand Up @@ -707,8 +707,9 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
}

static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
target_ulong page, unsigned bits)
static void tlb_flush_range_locked(CPUArchState *env, int midx,
target_ulong addr, target_ulong len,
unsigned bits)
{
CPUTLBDesc *d = &env_tlb(env)->d[midx];
CPUTLBDescFast *f = &env_tlb(env)->f[midx];
Expand All @@ -718,106 +719,94 @@ static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
* If @bits is smaller than the tlb size, there may be multiple entries
* within the TLB; otherwise all addresses that match under @mask hit
* the same TLB entry.
*
* TODO: Perhaps allow bits to be a few bits less than the size.
* For now, just flush the entire TLB.
*
* If @len is larger than the tlb size, then it will take longer to
* test all of the entries in the TLB than it will to flush it all.
*/
if (mask < f->mask) {
if (mask < f->mask || len > f->mask) {
tlb_debug("forcing full flush midx %d ("
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
midx, page, mask);
TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n",
midx, addr, mask, len);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
}

/* Check if we need to flush due to large pages. */
if ((page & d->large_page_mask) == d->large_page_addr) {
/*
* Check if we need to flush due to large pages.
* Because large_page_mask contains all 1's from the msb,
* we only need to test the end of the range.
*/
if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
tlb_debug("forcing full flush midx %d ("
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
midx, d->large_page_addr, d->large_page_mask);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
}

if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) {
tlb_n_used_entries_dec(env, midx);
for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) {
target_ulong page = addr + i;
CPUTLBEntry *entry = tlb_entry(env, midx, page);

if (tlb_flush_entry_mask_locked(entry, page, mask)) {
tlb_n_used_entries_dec(env, midx);
}
tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
}
tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
}

typedef struct {
target_ulong addr;
target_ulong len;
uint16_t idxmap;
uint16_t bits;
} TLBFlushPageBitsByMMUIdxData;
} TLBFlushRangeData;

static void
tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu,
TLBFlushPageBitsByMMUIdxData d)
static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
TLBFlushRangeData d)
{
CPUArchState *env = cpu->env_ptr;
int mmu_idx;

assert_cpu_is_self(cpu);

tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n",
d.addr, d.bits, d.idxmap);
tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n",
d.addr, d.bits, d.len, d.idxmap);

qemu_spin_lock(&env_tlb(env)->c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if ((d.idxmap >> mmu_idx) & 1) {
tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits);
tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits);
}
}
qemu_spin_unlock(&env_tlb(env)->c.lock);

tb_flush_jmp_cache(cpu, d.addr);
}

static bool encode_pbm_to_runon(run_on_cpu_data *out,
TLBFlushPageBitsByMMUIdxData d)
{
/* We need 6 bits to hold to hold @bits up to 63. */
if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) {
*out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits);
return true;
for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
tb_flush_jmp_cache(cpu, d.addr + i);
}
return false;
}

static TLBFlushPageBitsByMMUIdxData
decode_runon_to_pbm(run_on_cpu_data data)
{
target_ulong addr_map_bits = (target_ulong) data.target_ptr;
return (TLBFlushPageBitsByMMUIdxData){
.addr = addr_map_bits & TARGET_PAGE_MASK,
.idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6,
.bits = addr_map_bits & 0x3f
};
}

static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu,
run_on_cpu_data runon)
static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
run_on_cpu_data data)
{
tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon));
}

static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu,
run_on_cpu_data data)
{
TLBFlushPageBitsByMMUIdxData *d = data.host_ptr;
tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d);
TLBFlushRangeData *d = data.host_ptr;
tlb_flush_range_by_mmuidx_async_0(cpu, *d);
g_free(d);
}

void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
uint16_t idxmap, unsigned bits)
void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
target_ulong len, uint16_t idxmap,
unsigned bits)
{
TLBFlushPageBitsByMMUIdxData d;
run_on_cpu_data runon;
TLBFlushRangeData d;

/* If all bits are significant, this devolves to tlb_flush_page. */
if (bits >= TARGET_LONG_BITS) {
/*
* If all bits are significant, and len is small,
* this devolves to tlb_flush_page.
*/
if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
return;
}
Expand All @@ -829,34 +818,38 @@ void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,

/* This should already be page aligned */
d.addr = addr & TARGET_PAGE_MASK;
d.len = len;
d.idxmap = idxmap;
d.bits = bits;

if (qemu_cpu_is_self(cpu)) {
tlb_flush_page_bits_by_mmuidx_async_0(cpu, d);
} else if (encode_pbm_to_runon(&runon, d)) {
async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
tlb_flush_range_by_mmuidx_async_0(cpu, d);
} else {
TLBFlushPageBitsByMMUIdxData *p
= g_new(TLBFlushPageBitsByMMUIdxData, 1);

/* Otherwise allocate a structure, freed by the worker. */
*p = d;
async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2,
TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
async_run_on_cpu(cpu, tlb_flush_range_by_mmuidx_async_1,
RUN_ON_CPU_HOST_PTR(p));
}
}

void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap,
unsigned bits)
void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
uint16_t idxmap, unsigned bits)
{
tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
}

void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu,
target_ulong addr, target_ulong len,
uint16_t idxmap, unsigned bits)
{
TLBFlushPageBitsByMMUIdxData d;
run_on_cpu_data runon;
TLBFlushRangeData d;
CPUState *dst_cpu;

/* If all bits are significant, this devolves to tlb_flush_page. */
if (bits >= TARGET_LONG_BITS) {
/*
* If all bits are significant, and len is small,
* this devolves to tlb_flush_page.
*/
if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
return;
}
Expand All @@ -868,40 +861,45 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,

/* This should already be page aligned */
d.addr = addr & TARGET_PAGE_MASK;
d.len = len;
d.idxmap = idxmap;
d.bits = bits;

if (encode_pbm_to_runon(&runon, d)) {
flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
} else {
CPUState *dst_cpu;
TLBFlushPageBitsByMMUIdxData *p;

/* Allocate a separate data block for each destination cpu. */
CPU_FOREACH(dst_cpu) {
if (dst_cpu != src_cpu) {
p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
*p = d;
async_run_on_cpu(dst_cpu,
tlb_flush_page_bits_by_mmuidx_async_2,
RUN_ON_CPU_HOST_PTR(p));
}
/* Allocate a separate data block for each destination cpu. */
CPU_FOREACH(dst_cpu) {
if (dst_cpu != src_cpu) {
TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
async_run_on_cpu(dst_cpu,
tlb_flush_range_by_mmuidx_async_1,
RUN_ON_CPU_HOST_PTR(p));
}
}

tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d);
tlb_flush_range_by_mmuidx_async_0(src_cpu, d);
}

void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap,
unsigned bits)
void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap, unsigned bits)
{
TLBFlushPageBitsByMMUIdxData d;
run_on_cpu_data runon;
tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE,
idxmap, bits);
}

void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
target_ulong len,
uint16_t idxmap,
unsigned bits)
{
TLBFlushRangeData d, *p;
CPUState *dst_cpu;

/* If all bits are significant, this devolves to tlb_flush_page. */
if (bits >= TARGET_LONG_BITS) {
/*
* If all bits are significant, and len is small,
* this devolves to tlb_flush_page.
*/
if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
return;
}
Expand All @@ -913,32 +911,31 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,

/* This should already be page aligned */
d.addr = addr & TARGET_PAGE_MASK;
d.len = len;
d.idxmap = idxmap;
d.bits = bits;

if (encode_pbm_to_runon(&runon, d)) {
flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1,
runon);
} else {
CPUState *dst_cpu;
TLBFlushPageBitsByMMUIdxData *p;

/* Allocate a separate data block for each destination cpu. */
CPU_FOREACH(dst_cpu) {
if (dst_cpu != src_cpu) {
p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
*p = d;
async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
RUN_ON_CPU_HOST_PTR(p));
}
/* Allocate a separate data block for each destination cpu. */
CPU_FOREACH(dst_cpu) {
if (dst_cpu != src_cpu) {
p = g_memdup(&d, sizeof(d));
async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1,
RUN_ON_CPU_HOST_PTR(p));
}

p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
*p = d;
async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
RUN_ON_CPU_HOST_PTR(p));
}

p = g_memdup(&d, sizeof(d));
async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1,
RUN_ON_CPU_HOST_PTR(p));
}

void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap,
unsigned bits)
{
tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE,
idxmap, bits);
}

/* update the TLBs so that writes to code in the virtual page 'addr'
Expand Down
2 changes: 1 addition & 1 deletion disas/libvixl/vixl/code-buffer.h
Expand Up @@ -27,7 +27,7 @@
#ifndef VIXL_CODE_BUFFER_H
#define VIXL_CODE_BUFFER_H

#include <string.h>
#include <cstring>
#include "vixl/globals.h"

namespace vixl {
Expand Down
16 changes: 9 additions & 7 deletions disas/libvixl/vixl/globals.h
Expand Up @@ -40,15 +40,17 @@
#define __STDC_FORMAT_MACROS
#endif

#include <stdint.h>
extern "C" {
#include <inttypes.h>

#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
}

#include <cassert>
#include <cstdarg>
#include <cstddef>
#include <cstdio>
#include <cstdlib>

#include "vixl/platform.h"


Expand Down
2 changes: 1 addition & 1 deletion disas/libvixl/vixl/invalset.h
Expand Up @@ -27,7 +27,7 @@
#ifndef VIXL_INVALSET_H_
#define VIXL_INVALSET_H_

#include <string.h>
#include <cstring>

#include <algorithm>
#include <vector>
Expand Down
2 changes: 2 additions & 0 deletions disas/libvixl/vixl/platform.h
Expand Up @@ -28,7 +28,9 @@
#define PLATFORM_H

// Define platform specific functionalities.
extern "C" {
#include <signal.h>
}

namespace vixl {
inline void HostBreakpoint() { raise(SIGINT); }
Expand Down

0 comments on commit 92f8c6f

Please sign in to comment.