|
| 1 | +/* |
| 2 | + * Copyright (C) 2018 Intel Corporation. |
| 3 | + * SPDX-License-Identifier: BSD-3-Clause |
| 4 | + */ |
| 5 | + |
| 6 | +/* |
| 7 | + * The Intel Trace Hub (aka. North Peak, NPK) is a trace aggregator for |
| 8 | + * Software, Firmware, and Hardware. On the virtualization platform, it |
| 9 | + * can be used to output the traces from SOS/UOS/Hypervisor/FW together |
| 10 | + * with unified timestamps. |
| 11 | + * |
| 12 | + * There are 2 software visible MMIO space in the npk pci device. One is |
| 13 | + * the CSR which maps the configuration registers, and the other is the |
| 14 | + * STMR which is organized as many Masters, and used to send the traces. |
| 15 | + * Each Master has a fixed number of Channels, which is 128 on GP. Each |
| 16 | + * channel occupies 64B, so the offset of each Master is 8K (64B*128). |
| 17 | + * Here is the detailed layout of STMR: |
| 18 | + * M=NPK_SW_MSTR_STP (1024 on GP) |
| 19 | + * +-------------------+ |
| 20 | + * | m[M],c[C-1] | |
| 21 | + * Base(M,C-1) +-------------------+ |
| 22 | + * | ... | |
| 23 | + * +-------------------+ |
| 24 | + * | m[M],c[0] | |
| 25 | + * Base(M,0) +-------------------+ |
| 26 | + * | ... | |
| 27 | + * +-------------------+ |
| 28 | + * | m[i+1],c[1] | |
| 29 | + * Base(i+1,1) +-------------------+ |
| 30 | + * | m[i+1],c[0] | |
| 31 | + * Base(i+1,0) +-------------------+ |
| 32 | + * | ... | |
| 33 | + * +-------------------+ |
| 34 | + * | m[i],c[1] | |
| 35 | + * Base(i,1)=SW_BAR+0x40 +-------------------+ |
| 36 | + * | m[i],c[0] | 64B |
| 37 | + * Base(i,0)=SW_BAR +-------------------+ |
| 38 | + * i=NPK_SW_MSTR_STRT (256 on GP) |
| 39 | + * |
| 40 | + * CSR and STMR are treated differently in npk virtualization because: |
| 41 | + * 1. CSR configuration should come from just one OS, instead of each OS. |
| 42 | + * In our case, it should come from SOS. |
| 43 | + * 2. For performance and timing concern, the traces from each OS should |
| 44 | + * be written to STMR directly. |
| 45 | + * |
| 46 | + * Based on these, the npk virtualization is implemented in this way: |
| 47 | + * 1. The physical CSR is owned by SOS, and dm/npk emulates a software |
| 48 | + * one for the UOS, to keep the npk driver on UOS unchanged. Some CSR |
| 49 | + * initial values are configured to make the UOS npk driver think it |
| 50 | + * is working on a real npk. The CSR configuration from UOS is ignored |
| 51 | + * by dm, and it will not bring any side-effect. Because traces are the |
| 52 | + * only things needed from UOS, the location to send traces to and the |
| 53 | + * trace format are not affected by the CSR configuration. |
| 54 | + * 2. Part of the physical STMR will be reserved for the SOS, and the |
| 55 | + * others will be passed through to the UOS, so that the UOS can write |
| 56 | + * the traces to the MMIO space directly. |
| 57 | + * |
| 58 | + * A parameter is needed to indicate the offset and size of the Masters |
| 59 | + * to pass through to the UOS. For example, "-s 0:2,npk,512/256", there |
| 60 | + * are 256 Masters from #768 (256+512, #256 is the starting Master for |
| 61 | + * software tracing) passed through to the UOS. |
| 62 | + * |
| 63 | + * CSR STMR |
| 64 | + * SOS: +--------------+ +----------------------------------+ |
| 65 | + * | physical CSR | | Reserved for SOS | | |
| 66 | + * +--------------+ +----------------------------------+ |
| 67 | + * UOS: +--------------+ +---------------+ |
| 68 | + * | sw CSR by dm | | mapped to UOS | |
| 69 | + * +--------------+ +---------------+ |
| 70 | + * |
| 71 | + * Here is an overall flow about how it works. |
| 72 | + * 1. System boots up, and the npk driver on SOS is loaded. |
| 73 | + * 2. The dm is launched with parameters to enable npk virtualization. |
| 74 | + * 3. The dm/npk sets up a bar for CSR, and some values are initialized |
| 75 | + * based on the parameters, for example, the total number of Masters for |
| 76 | + * the UOS. |
| 77 | + * 4. The dm/npk sets up a bar for STMR, and maps part of the physical |
| 78 | + * STMR to it with an offset, according to the parameters. |
| 79 | + * 5. The UOS boots up, and the native npk driver on the UOS is loaded. |
| 80 | + * 6. Enable the traces from UOS, and the traces are written directly to |
| 81 | + * STMR, but not output by npk for now. |
| 82 | + * 7. Enable the npk output on SOS, and now the traces are output by npk |
| 83 | + * to the selected target. |
| 84 | + * 8. If the memory is the selected target, the traces can be retrieved |
| 85 | + * from memory on SOS, after stopping the traces. |
| 86 | + */ |
| 87 | + |
| 88 | +#include <stdio.h> |
| 89 | +#include <fcntl.h> |
| 90 | +#include <unistd.h> |
| 91 | +#include <dirent.h> |
| 92 | + |
| 93 | +#include "dm.h" |
| 94 | +#include "vmmapi.h" |
| 95 | +#include "pci_core.h" |
| 96 | +#include "npk.h" |
| 97 | + |
| 98 | +static int pci_npk_debug; |
| 99 | +#define DPRINTF(params) do { if (pci_npk_debug) printf params; } while (0) |
| 100 | +#define WPRINTF(params) (printf params) |
| 101 | + |
| 102 | +#define npk_gth_reg(x) (npk_csr[NPK_CSR_GTH].data.u8[(x)]) |
| 103 | +#define npk_sth_reg(x) (npk_csr[NPK_CSR_STH].data.u8[(x)]) |
| 104 | +#define npk_msc0_reg(x) (npk_csr[NPK_CSR_MSC0].data.u8[(x)]) |
| 105 | +#define npk_msc1_reg(x) (npk_csr[NPK_CSR_MSC1].data.u8[(x)]) |
| 106 | +#define npk_pti_reg(x) (npk_csr[NPK_CSR_PTI].data.u8[(x)]) |
| 107 | +#define npk_gth_reg32(x) (npk_csr[NPK_CSR_GTH].data.u32[(x)>>2]) |
| 108 | +#define npk_sth_reg32(x) (npk_csr[NPK_CSR_STH].data.u32[(x)>>2]) |
| 109 | +#define npk_msc0_reg32(x) (npk_csr[NPK_CSR_MSC0].data.u32[(x)>>2]) |
| 110 | +#define npk_msc1_reg32(x) (npk_csr[NPK_CSR_MSC1].data.u32[(x)>>2]) |
| 111 | +#define npk_pti_reg32(x) (npk_csr[NPK_CSR_PTI].data.u32[(x)>>2]) |
| 112 | + |
| 113 | +/* the registers in CSR */ |
| 114 | +static uint8_t _npk_gth_reg[NPK_CSR_GTH_SZ]; |
| 115 | +static uint8_t _npk_sth_reg[NPK_CSR_STH_SZ]; |
| 116 | +static uint8_t _npk_msc0_reg[NPK_CSR_MSC0_SZ]; |
| 117 | +static uint8_t _npk_msc1_reg[NPK_CSR_MSC1_SZ]; |
| 118 | +static uint8_t _npk_pti_reg[NPK_CSR_PTI_SZ]; |
| 119 | + |
| 120 | +static struct npk_regs npk_csr[NPK_CSR_LAST] = { |
| 121 | + /* GTH */ |
| 122 | + { NPK_CSR_GTH_BASE, NPK_CSR_GTH_SZ, { _npk_gth_reg } }, |
| 123 | + /* STH */ |
| 124 | + { NPK_CSR_STH_BASE, NPK_CSR_STH_SZ, { _npk_sth_reg } }, |
| 125 | + /* MSC0 */ |
| 126 | + { NPK_CSR_MSC0_BASE, NPK_CSR_MSC0_SZ, { _npk_msc0_reg } }, |
| 127 | + /* MSC1 */ |
| 128 | + { NPK_CSR_MSC1_BASE, NPK_CSR_MSC1_SZ, { _npk_msc1_reg } }, |
| 129 | + /* PTI */ |
| 130 | + { NPK_CSR_PTI_BASE, NPK_CSR_PTI_SZ, { _npk_pti_reg } } |
| 131 | +}; |
| 132 | + |
| 133 | +/* the default values are from intel_th developer's manual */ |
| 134 | +static struct npk_reg_default_val regs_default_val[] = { |
| 135 | + { NPK_CSR_GTH, NPK_CSR_GTHOPT0, 0x00040101}, |
| 136 | + { NPK_CSR_MSC0, NPK_CSR_MSCxCTL, 0x00000300}, |
| 137 | + { NPK_CSR_MSC1, NPK_CSR_MSCxCTL, 0x00000300} |
| 138 | +}; |
| 139 | +#define regs_default_val_num (sizeof(regs_default_val) / \ |
| 140 | + sizeof(struct npk_reg_default_val)) |
| 141 | + |
| 142 | +static int npk_in_use; |
| 143 | + |
| 144 | +/* get the pointer to the register based on the offset */ |
| 145 | +static inline uint32_t *offset2reg(uint64_t offset) |
| 146 | +{ |
| 147 | + uint32_t *reg = NULL, i; |
| 148 | + struct npk_regs *regs; |
| 149 | + |
| 150 | + /* traverse the npk_csr to find the correct one */ |
| 151 | + for (i = NPK_CSR_FIRST; i < NPK_CSR_LAST; i++) { |
| 152 | + regs = &npk_csr[i]; |
| 153 | + if (offset >= regs->base && offset < regs->base + regs->size) { |
| 154 | + reg = regs->data.u32 + ((offset - regs->base) >> 2); |
| 155 | + break; |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + return reg; |
| 160 | +} |
| 161 | + |
| 162 | +static inline int valid_param(int m_off, int m_num) |
| 163 | +{ |
| 164 | + /* 256-aligned, no less than 256, no overflow */ |
| 165 | + if (!(m_off & 0xFF) && !(m_num & 0xFF) && (m_off >> 8) > 0 |
| 166 | + && (m_num >> 8) > 0 && m_off + m_num <= NPK_SW_MSTR_NUM) |
| 167 | + return 1; |
| 168 | + |
| 169 | + return 0; |
| 170 | +} |
| 171 | + |
| 172 | +/* |
| 173 | + * Set up a bar for CSR, and some values are initialized based on the |
| 174 | + * parameters, for example, the total number of Masters for the UOS. |
| 175 | + * Set up a bar for STMR, and map part of the physical STMR to it with |
| 176 | + * an offset, according to the parameters. |
| 177 | + */ |
| 178 | +static int pci_npk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts) |
| 179 | +{ |
| 180 | + int i, b, s, f, fd, ret, m_off, m_num, error = -1; |
| 181 | + DIR *dir; |
| 182 | + struct dirent *dent; |
| 183 | + char name[PATH_MAX]; |
| 184 | + uint8_t h_cfg[PCI_REGMAX + 1]; |
| 185 | + uint64_t sw_bar_base; |
| 186 | + struct npk_reg_default_val *d; |
| 187 | + |
| 188 | + if (npk_in_use) { |
| 189 | + WPRINTF(("NPK is already in use\n")); |
| 190 | + return error; |
| 191 | + } |
| 192 | + npk_in_use = 1; |
| 193 | + |
| 194 | + /* |
| 195 | + * CSR (bar#0): emulate it for guests using npk_csr |
| 196 | + * |
| 197 | + * STMR (bar#2): map the host MMIO space to guests with an offset |
| 198 | + * |
| 199 | + * +--NPK_SW_MSTR_STRT +--m_off NPK_SW_MSTR_STP--+ |
| 200 | + * | +----- m_num ------+ | |
| 201 | + * v v v v |
| 202 | + * +--------------------+--------------------+-------------------+ |
| 203 | + * | | | | |
| 204 | + * | Reserved for SOS | Mapped for UOS#x | | |
| 205 | + * | | | | |
| 206 | + * +--------------------+--------------------+-------------------+ |
| 207 | + * ^ ^ |
| 208 | + * | | |
| 209 | + * +--sw_bar for host +--sw_bar for UOS#x |
| 210 | + */ |
| 211 | + |
| 212 | + /* get the master offset and the number for this guest */ |
| 213 | + if (opts == NULL || sscanf(opts, "%d/%d", &m_off, &m_num) != 2 |
| 214 | + || !valid_param(m_off, m_num)) { |
| 215 | + m_off = 256; |
| 216 | + m_num = 256; |
| 217 | + } |
| 218 | + |
| 219 | + /* check if the intel_th_pci driver is loaded */ |
| 220 | + dir = opendir(NPK_DRV_SYSFS_PATH); |
| 221 | + if (dir == NULL) { |
| 222 | + WPRINTF(("NPK driver not loaded\n")); |
| 223 | + return error; |
| 224 | + } |
| 225 | + |
| 226 | + /* traverse the driver folder, and try to find the NPK BDF# */ |
| 227 | + while ((dent = readdir(dir)) != NULL) { |
| 228 | + if (sscanf(dent->d_name, "0000:%x:%x.%x", &b, &s, &f) != 3) |
| 229 | + continue; |
| 230 | + else |
| 231 | + break; |
| 232 | + } |
| 233 | + closedir(dir); |
| 234 | + |
| 235 | + if (!dent) { |
| 236 | + WPRINTF(("Cannot find NPK device\n")); |
| 237 | + return error; |
| 238 | + } |
| 239 | + |
| 240 | + /* read the host NPK configuration space */ |
| 241 | + sprintf(name, "%s/%s/config", NPK_DRV_SYSFS_PATH, dent->d_name); |
| 242 | + fd = open(name, O_RDONLY); |
| 243 | + if (fd == -1) { |
| 244 | + WPRINTF(("Cannot open host NPK config\n")); |
| 245 | + return error; |
| 246 | + } |
| 247 | + |
| 248 | + ret = pread(fd, h_cfg, PCI_REGMAX + 1, 0); |
| 249 | + close(fd); |
| 250 | + if (ret < PCI_REGMAX + 1) { |
| 251 | + WPRINTF(("Cannot read host NPK config\n")); |
| 252 | + return error; |
| 253 | + } |
| 254 | + |
| 255 | + /* initialize the configuration space */ |
| 256 | + pci_set_cfgdata16(dev, PCIR_VENDOR, *(uint16_t *)&h_cfg[PCIR_VENDOR]); |
| 257 | + pci_set_cfgdata16(dev, PCIR_DEVICE, *(uint16_t *)&h_cfg[PCIR_DEVICE]); |
| 258 | + pci_set_cfgdata16(dev, PCIR_REVID, *(uint16_t *)&h_cfg[PCIR_REVID]); |
| 259 | + pci_set_cfgdata8(dev, PCIR_CLASS, h_cfg[PCIR_CLASS]); |
| 260 | + |
| 261 | + /* get the host base of NPK bar#2, plus the offset for the guest */ |
| 262 | + sw_bar_base = *(uint32_t *)&h_cfg[PCIR_BAR(2)] & PCIM_BAR_MEM_BASE; |
| 263 | + sw_bar_base += NPK_MSTR_TO_MEM_SZ(m_off); |
| 264 | + |
| 265 | + /* allocate the bar#0 (CSR)*/ |
| 266 | + error = pci_emul_alloc_bar(dev, 0, PCIBAR_MEM64, NPK_CSR_MTB_BAR_SZ); |
| 267 | + if (error) { |
| 268 | + WPRINTF(("Cannot alloc bar#0 for the guest\n")); |
| 269 | + return error; |
| 270 | + } |
| 271 | + |
| 272 | + /* allocate the bar#2 (STMR)*/ |
| 273 | + error = pci_emul_alloc_pbar(dev, 2, sw_bar_base, PCIBAR_MEM64, |
| 274 | + NPK_MSTR_TO_MEM_SZ(m_num)); |
| 275 | + if (error) { |
| 276 | + WPRINTF(("Cannot alloc bar#2 for the guest\n")); |
| 277 | + return error; |
| 278 | + } |
| 279 | + |
| 280 | + /* |
| 281 | + * map this part of STMR to the guest so that the traces from UOS are |
| 282 | + * written directly to it. |
| 283 | + */ |
| 284 | + error = vm_map_ptdev_mmio(ctx, dev->bus, dev->slot, dev->func, |
| 285 | + dev->bar[2].addr, dev->bar[2].size, sw_bar_base); |
| 286 | + if (error) { |
| 287 | + WPRINTF(("Cannot Map the address to the guest MMIO space\n")); |
| 288 | + return error; |
| 289 | + } |
| 290 | + |
| 291 | + /* setup default values for some registers */ |
| 292 | + for (i = 0; i < regs_default_val_num; i++) { |
| 293 | + d = ®s_default_val[i]; |
| 294 | + npk_csr[d->csr].data.u32[d->offset >> 2] = d->default_val; |
| 295 | + } |
| 296 | + /* setup the SW Master Start/Stop and Channels per Master for UOS */ |
| 297 | + npk_sth_reg32(NPK_CSR_STHCAP0) = NPK_SW_MSTR_STRT | |
| 298 | + ((m_num + NPK_SW_MSTR_STRT - 1) << 16); |
| 299 | + npk_sth_reg32(NPK_CSR_STHCAP1) = ((NPK_SW_MSTR_STRT - 1) << 24) | |
| 300 | + NPK_CHANNELS_PER_MSTR; |
| 301 | + |
| 302 | + /* set Pipe Line Empty for GTH/MSCx State */ |
| 303 | + npk_gth_reg(NPK_CSR_GTHSTAT) = NPK_CSR_GTHSTAT_PLE; |
| 304 | + npk_msc0_reg32(NPK_CSR_MSCxSTS) = NPK_CSR_MSCxSTS_PLE; |
| 305 | + npk_msc1_reg32(NPK_CSR_MSCxSTS) = NPK_CSR_MSCxSTS_PLE; |
| 306 | + |
| 307 | + DPRINTF(("NPK[%x:%x:%x] h_bar#2@0x%lx g_bar#2@0x%lx[0x%lx] m+%d[%d]\n", |
| 308 | + b, s, f, sw_bar_base, dev->bar[2].addr, |
| 309 | + dev->bar[2].size, m_off, m_num)); |
| 310 | + |
| 311 | + return 0; |
| 312 | +} |
| 313 | + |
| 314 | +static void pci_npk_deinit(struct vmctx *ctx, struct pci_vdev *dev, char *opts) |
| 315 | +{ |
| 316 | + npk_in_use = 0; |
| 317 | +} |
| 318 | + |
| 319 | +/* the CSR configuration from UOS will not take effect on the physical NPK */ |
| 320 | +static void pci_npk_write(struct vmctx *ctx, int vcpu, struct pci_vdev *dev, |
| 321 | + int baridx, uint64_t offset, int size, uint64_t value) |
| 322 | +{ |
| 323 | + uint32_t *reg; |
| 324 | + |
| 325 | + DPRINTF(("W %d +0x%lx[%d] val 0x%lx\n", baridx, offset, size, value)); |
| 326 | + |
| 327 | + if (baridx != 0 || (offset & 0x3) || size != 4) |
| 328 | + return; |
| 329 | + |
| 330 | + /* try to set the register value in npk_csr */ |
| 331 | + reg = offset2reg(offset); |
| 332 | + if (reg) |
| 333 | + *reg = (uint32_t)value; |
| 334 | +} |
| 335 | + |
| 336 | +static uint64_t pci_npk_read(struct vmctx *ctx, int vcpu, struct pci_vdev *dev, |
| 337 | + int baridx, uint64_t offset, int size) |
| 338 | +{ |
| 339 | + uint32_t *reg, val = 0; |
| 340 | + |
| 341 | + DPRINTF(("R %d +0x%lx[%d] val 0x%x\n", baridx, offset, size, val)); |
| 342 | + |
| 343 | + if (baridx != 0 || (offset & 0x3) || size != 4) |
| 344 | + return (uint64_t)val; |
| 345 | + |
| 346 | + /* try to get the register value from npk_csr */ |
| 347 | + reg = offset2reg(offset); |
| 348 | + if (reg) |
| 349 | + val = *reg; |
| 350 | + |
| 351 | + return (uint64_t)val; |
| 352 | +} |
| 353 | + |
| 354 | +struct pci_vdev_ops pci_ops_npk = { |
| 355 | + .class_name = "npk", |
| 356 | + .vdev_init = pci_npk_init, |
| 357 | + .vdev_deinit = pci_npk_deinit, |
| 358 | + .vdev_barwrite = pci_npk_write, |
| 359 | + .vdev_barread = pci_npk_read, |
| 360 | +}; |
| 361 | +DEFINE_PCI_DEVTYPE(pci_ops_npk); |
0 commit comments