Skip to content

Commit

Permalink
npu2: Use phys-map to get MMIO BARs
Browse files Browse the repository at this point in the history
Commit bdea201 ("hw/npu2.c: Use phys-map to get GPU memory BARs")
added use of phys-map for setting GPU memory BARs.

Move the MMIO BARs over to using phys-map as well.

Acked-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
  • Loading branch information
ajdlinux authored and stewartsmith committed Jun 30, 2017
1 parent a1e0a04 commit 785c80b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 50 deletions.
96 changes: 46 additions & 50 deletions hw/npu2.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ static struct npu2_dev *npu2_bdf_to_dev(struct npu2 *p,
return NULL;
}

static inline void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
{
phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
}

static void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
{
uint64_t reg, val;
Expand Down Expand Up @@ -1116,55 +1121,45 @@ static const struct phb_ops npu_ops = {

static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint64_t mm_win[2])
{
uint64_t mem_start;
uint32_t i;
struct npu2_bar *bar;
struct npu2_bar npu2_bars[] = {
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR), .size = 0x1000000,
/*
* NPU_REGS must be first in this list, at least on DD1.
* On DD2, stack 0 will be used for NPU_REGS, stack 1/2 for NPU_PHY.
*/
{ .type = NPU_REGS, .index = 0,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR), .size = 0x200000,
{ .type = NPU_PHY, .index = 0,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR), .size = 0x200000,
{ .type = NPU_PHY, .index = 1,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR), .size = 0x20000 },
{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR), .size = 0x20000 },
{ .type = NPU_NTL, .index = 0,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR) },
{ .type = NPU_NTL, .index = 1,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR) },
{ .type = NPU_NTL, .index = 2,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR) },
{ .type = NPU_NTL, .index = 3,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR) },
{ .type = NPU_NTL, .index = 4,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR) },
{ .type = NPU_NTL, .index = 5,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR) },
{ .type = NPU_GENID, .index = 0,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR) },
{ .type = NPU_GENID, .index = 1,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR) },
{ .type = NPU_GENID, .index = 2,
.reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) },
};

mem_start = 0x6030200000000;
mem_start |= gcid << PPC_BITLSHIFT(21);

/*
* We're going to assign the BARs in reversed order according
* to their sizes, just like the order we have in npu_bars[].
* In that way, all BARs will be aligned perfectly without
* wasting resources. Also, the Linux kernel won't change
* anything though it attempts to reassign the BARs that
* it can see, which are NTL and GENID BARs.
*
* GLOBAL MMIO (16MB)
* PHY0 (2MB)
* PHB1 (2MB)
* NTL0 (128KB)
* NTL1 (128KB)
* NTL2 (128KB)
* NTL3 (128KB)
* NTL4 (128KB)
* NTL5 (128KB)
* GENID0 (128KB)
* GENID1 (128KB)
* GENID2 (128KB)
*/
for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) {
bar = &npu2_bars[i];
bar->base = mem_start;
mem_start += bar->size;
npu2_get_bar(gcid, bar);
npu2_write_bar(NULL, bar, gcid, scom);
}

Expand Down Expand Up @@ -1457,6 +1452,7 @@ static void npu2_populate_devices(struct npu2 *p,
struct npu2_dev *dev;
struct dt_node *npu2_dn, *link;
uint32_t npu_phandle, index = 0;
int stack;

/*
* Get the npu node which has the links which we expand here
Expand Down Expand Up @@ -1489,23 +1485,23 @@ static void npu2_populate_devices(struct npu2 *p,
dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");

/* Populate BARs. BAR0/1 is the NTL bar. We initialise
* it from the HW. */
/* Populate BARs. BAR0/1 is the NTL bar. */
stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
npu2_bar = &dev->bars[0].npu2_bar;
if (NPU2DEV_BRICK(dev) == 0)
/* Leave the block as 0 - the read/write bar
* functions fill it in */
npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL0_BAR);
else
npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL1_BAR);
npu2_bar->type = NPU_NTL;
npu2_bar->index = dev->index;
npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
NPU2_NTL0_BAR : NPU2_NTL1_BAR);
npu2_get_bar(p->chip_id, npu2_bar);

npu2_read_bar(p, npu2_bar);
dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;

/* BAR2/3 is the GENID bar. */
npu2_bar = &dev->bars[1].npu2_bar;
npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_GENID_BAR);
npu2_read_bar(p, npu2_bar);
npu2_bar->type = NPU_GENID;
npu2_bar->index = NPU2DEV_STACK(dev);
npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
npu2_get_bar(p->chip_id, npu2_bar);

/* The GENID is a single physical BAR that we split
* for each emulated device */
Expand Down
4 changes: 4 additions & 0 deletions include/npu2.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#ifndef __NPU2_H
#define __NPU2_H

#include <phys-map.h>

/* Debugging options */
#define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
(p)->phb.opal_id, ##a)
Expand Down Expand Up @@ -49,6 +51,8 @@
* emulated PCIe BARs. The is a subtle difference between the two as
* not all BARs are exposed outside of skiboot. */
struct npu2_bar {
enum phys_map_type type;
int index;
#define NPU2_BAR_FLAG_ENABLED 0x0010

/* Generation ID's are a single space in the hardware but we split
Expand Down

0 comments on commit 785c80b

Please sign in to comment.