Permalink
Browse files

nve0: initial exa/xv acceleration for kepler chipsets

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
  • Loading branch information...
1 parent 94b9fd2 commit 0c7850836e91e43af98ef8cc10dbc15b839367d3 @skeggsb committed Mar 28, 2012
Showing with 582 additions and 39 deletions.
  1. +6 −0 src/nouveau_exa.c
  2. +1 −1 src/nouveau_xv.c
  3. +4 −0 src/nv_accel_common.c
  4. +4 −0 src/nv_proto.h
  5. +72 −17 src/nvc0_accel.c
  6. +34 −20 src/nvc0_accel.h
  7. +57 −0 src/nvc0_exa.c
  8. +0 −1 src/nvc0_shader.h
  9. +404 −0 src/nve0_shader.h
View
@@ -47,6 +47,11 @@ NVAccelM2MF(NVPtr pNv, int w, int h, int cpp, uint32_t srcoff, uint32_t dstoff,
struct nouveau_bo *src, int sd, int sp, int sh, int sx, int sy,
struct nouveau_bo *dst, int dd, int dp, int dh, int dx, int dy)
{
+ if (pNv->Architecture >= NV_ARCH_E0)
+ return NVE0EXARectCopy(pNv, w, h, cpp,
+ src, srcoff, sd, sp, sh, sx, sy,
+ dst, dstoff, dd, dp, dh, dx, dy);
+ else
if (pNv->Architecture >= NV_ARCH_C0)
return NVC0EXARectM2MF(pNv, w, h, cpp,
src, srcoff, sd, sp, sh, sx, sy,
@@ -413,6 +418,7 @@ nouveau_exa_init(ScreenPtr pScreen)
exa->DoneComposite = NV50EXADoneComposite;
break;
case NV_ARCH_C0:
+ case NV_ARCH_E0:
exa->CheckComposite = NVC0EXACheckComposite;
exa->PrepareComposite = NVC0EXAPrepareComposite;
exa->Composite = NVC0EXAComposite;
View
@@ -256,7 +256,7 @@ nouveau_xv_bo_realloc(ScrnInfoPtr pScrn, unsigned flags, unsigned size,
if (pNv->Architecture == NV_ARCH_50)
config.nv50.memtype = 0x70;
else
- if (pNv->Architecture == NV_ARCH_C0)
+ if (pNv->Architecture >= NV_ARCH_C0)
config.nvc0.memtype = 0xfe;
}
flags |= NOUVEAU_BO_MAP;
View
@@ -623,11 +623,15 @@ NVAccelCommonInit(ScrnInfoPtr pScrn)
if (pNv->Architecture < NV_ARCH_C0)
INIT_CONTEXT_OBJECT(M2MF_NV50);
else
+ if (pNv->Architecture < NV_ARCH_E0)
INIT_CONTEXT_OBJECT(M2MF_NVC0);
+ else
+ INIT_CONTEXT_OBJECT(P2MF_NVE0);
/* 3D init */
switch (pNv->Architecture) {
case NV_ARCH_C0:
+ case NV_ARCH_E0:
INIT_CONTEXT_OBJECT(3D_NVC0);
break;
case NV_ARCH_50:
View
@@ -148,6 +148,7 @@ Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn);
/* in nvc0_accel.c */
Bool NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn);
+Bool NVAccelInitP2MF_NVE0(ScrnInfoPtr pScrn);
Bool NVAccelInit2D_NVC0(ScrnInfoPtr pScrn);
Bool NVAccelInit3D_NVC0(ScrnInfoPtr pScrn);
@@ -191,6 +192,9 @@ Bool NVC0EXAUploadSIFC(const char *src, int src_pitch,
Bool NVC0EXARectM2MF(NVPtr pNv, int, int, int,
struct nouveau_bo *, uint32_t, int, int, int, int, int,
struct nouveau_bo *, uint32_t, int, int, int, int, int);
+Bool NVE0EXARectCopy(NVPtr pNv, int, int, int,
+ struct nouveau_bo *, uint32_t, int, int, int, int, int,
+ struct nouveau_bo *, uint32_t, int, int, int, int, int);
/* nv50_xv.c */
int nv50_xv_image_put(ScrnInfoPtr, struct nouveau_bo *, int, int, int, int,
View
@@ -23,6 +23,7 @@
#include "nv_include.h"
#include "nvc0_accel.h"
#include "nvc0_shader.h"
+#include "nve0_shader.h"
Bool
NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn)
@@ -51,6 +52,30 @@ NVAccelInitM2MF_NVC0(ScrnInfoPtr pScrn)
return TRUE;
}
+Bool
+NVAccelInitP2MF_NVE0(ScrnInfoPtr pScrn)
+{
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_pushbuf *push = pNv->pushbuf;
+ int ret;
+
+ ret = nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM, 128 * 1024, 0x20000,
+ NULL, &pNv->tesla_scratch);
+ if (ret)
+ return FALSE;
+
+ ret = nouveau_object_new(pNv->channel, 0x0000a040, 0xa040,
+ NULL, 0, &pNv->NvMemFormat);
+ if (ret)
+ return FALSE;
+
+ BEGIN_NVC0(push, NV01_SUBC(P2MF, OBJECT), 1);
+ PUSH_DATA (push, pNv->NvMemFormat->handle);
+ BEGIN_NVC0(push, NV01_SUBC(COPY, OBJECT), 1);
+ PUSH_DATA (push, 0x0000a0b5);
+ return TRUE;
+}
+
Bool
NVAccelInit2D_NVC0(ScrnInfoPtr pScrn)
{
@@ -104,9 +129,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
NVPtr pNv = NVPTR(pScrn);
struct nouveau_pushbuf *push = pNv->pushbuf;
struct nouveau_bo *bo = pNv->tesla_scratch;
+ uint32_t class;
int ret;
- ret = nouveau_object_new(pNv->channel, 0x00009097, 0x9097,
+ if (pNv->Architecture < NV_ARCH_E0)
+ class = 0x9097;
+ else
+ class = 0xa097;
+
+ ret = nouveau_object_new(pNv->channel, class, class,
NULL, 0, &pNv->Nv3D);
if (ret)
return FALSE;
@@ -155,11 +186,25 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
PUSH_DATA (push, 1);
- BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(4)), 1);
- PUSH_DATA (push, 0x54);
- BEGIN_NIC0(push, NVC0_3D(BIND_TIC(4)), 2);
- PUSH_DATA (push, (0 << 9) | (0 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
- PUSH_DATA (push, (1 << 9) | (1 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
+ if (pNv->Architecture < NV_ARCH_E0) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(4)), 1);
+ PUSH_DATA (push, 0x54);
+ BEGIN_NIC0(push, NVC0_3D(BIND_TIC(4)), 2);
+ PUSH_DATA (push, (0 << 9) | (0 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
+ PUSH_DATA (push, (1 << 9) | (1 << 1) | NVC0_3D_BIND_TIC_ACTIVE);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 6);
+ PUSH_DATA (push, 256);
+ PUSH_DATA (push, (bo->offset + TB_OFFSET) >> 32);
+ PUSH_DATA (push, (bo->offset + TB_OFFSET));
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x00000000);
+ PUSH_DATA (push, 0x00000001);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(4)), 1);
+ PUSH_DATA (push, 0x11);
+ BEGIN_NVC0(push, SUBC_3D(0x2608), 1);
+ PUSH_DATA (push, 1);
+ }
BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32);
@@ -169,18 +214,28 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32);
PUSH_DATA (push, (bo->offset + CODE_OFFSET));
+ if (pNv->Architecture < NV_ARCH_E0) {
+ NVC0PushProgram(pNv, PVP_PASS, NVC0VP_Passthrough);
+ NVC0PushProgram(pNv, PFP_S, NVC0FP_Source);
+ NVC0PushProgram(pNv, PFP_C, NVC0FP_Composite);
+ NVC0PushProgram(pNv, PFP_CCA, NVC0FP_CAComposite);
+ NVC0PushProgram(pNv, PFP_CCASA, NVC0FP_CACompositeSrcAlpha);
+ NVC0PushProgram(pNv, PFP_S_A8, NVC0FP_Source_A8);
+ NVC0PushProgram(pNv, PFP_C_A8, NVC0FP_Composite_A8);
+ NVC0PushProgram(pNv, PFP_NV12, NVC0FP_NV12);
- NVC0PushProgram(pNv, PVP_PASS, NVC0VP_Passthrough);
- NVC0PushProgram(pNv, PFP_S, NVC0FP_Source);
- NVC0PushProgram(pNv, PFP_C, NVC0FP_Composite);
- NVC0PushProgram(pNv, PFP_CCA, NVC0FP_CAComposite);
- NVC0PushProgram(pNv, PFP_CCASA, NVC0FP_CACompositeSrcAlpha);
- NVC0PushProgram(pNv, PFP_S_A8, NVC0FP_Source_A8);
- NVC0PushProgram(pNv, PFP_C_A8, NVC0FP_Composite_A8);
- NVC0PushProgram(pNv, PFP_NV12, NVC0FP_NV12);
-
- BEGIN_NVC0(push, NVC0_3D(MEM_BARRIER), 1);
- PUSH_DATA (push, 0x1111);
+ BEGIN_NVC0(push, NVC0_3D(MEM_BARRIER), 1);
+ PUSH_DATA (push, 0x1111);
+ } else {
+ NVC0PushProgram(pNv, PVP_PASS, NVE0VP_Passthrough);
+ NVC0PushProgram(pNv, PFP_S, NVE0FP_Source);
+ NVC0PushProgram(pNv, PFP_C, NVE0FP_Composite);
+ NVC0PushProgram(pNv, PFP_CCA, NVE0FP_CAComposite);
+ NVC0PushProgram(pNv, PFP_CCASA, NVE0FP_CACompositeSrcAlpha);
+ NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8);
+ NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8);
+ NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12);
+ }
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4);
PUSH_DATA (push, NVC0_3D_SP_SELECT_PROGRAM_VP_B |
View
@@ -13,10 +13,12 @@
#define SUBC_3D(mthd) 0, (mthd)
#define NVC0_3D(mthd) SUBC_3D(NVC0_3D_##mthd)
#define SUBC_M2MF(mthd) 2, (mthd)
+#define SUBC_P2MF(mthd) 2, (mthd)
#define NVC0_M2MF(mthd) SUBC_M2MF(NVC0_M2MF_##mthd)
#define SUBC_2D(mthd) 3, (mthd)
#define NV50_2D(mthd) SUBC_2D(NV50_2D_##mthd)
#define NVC0_2D(mthd) SUBC_2D(NVC0_2D_##mthd)
+#define SUBC_COPY(mthd) 4, (mthd)
#define SUBC_NVSW(mthd) 5, (mthd)
/* scratch buffer offsets */
@@ -26,21 +28,23 @@
#define NTFY_OFFSET 0x08000
#define MISC_OFFSET 0x10000
-/* fragment programs */
-#define PFP_S 0x0000 /* (src) */
-#define PFP_C 0x0100 /* (src IN mask) */
-#define PFP_CCA 0x0200 /* (src IN mask) component-alpha */
-#define PFP_CCASA 0x0300 /* (src IN mask) component-alpha src-alpha */
-#define PFP_S_A8 0x0400 /* (src) a8 rt */
-#define PFP_C_A8 0x0500 /* (src IN mask) a8 rt - same for CA and CA_SA */
-#define PFP_NV12 0x0600 /* NV12 YUV->RGB */
-
-/* vertex programs */
-#define PVP_PASS 0x0700 /* vertex pass-through shader */
+/* vertex/fragment programs */
+#define SPO ((pNv->Architecture < NV_ARCH_E0) ? 0x0000 : 0x0030)
+#define PVP_PASS (0x0000 + SPO) /* vertex pass-through shader */
+#define PFP_S (0x0200 + SPO) /* (src) */
+#define PFP_C (0x0400 + SPO) /* (src IN mask) */
+#define PFP_CCA (0x0600 + SPO) /* (src IN mask) component-alpha */
+#define PFP_CCASA (0x0800 + SPO) /* (src IN mask) component-alpha src-alpha */
+#define PFP_S_A8 (0x0a00 + SPO) /* (src) a8 rt */
+#define PFP_C_A8 (0x0c00 + SPO) /* (src IN mask) a8 rt - same for CCA/CCASA */
+#define PFP_NV12 (0x0e00 + SPO) /* NV12 YUV->RGB */
/* shader constants */
#define CB_OFFSET 0x1000
+/* texture bindings (kepler) */
+#define TB_OFFSET 0x1800
+
#define VTX_ATTR(a, c, t, s) \
((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
@@ -98,15 +102,25 @@ static __inline__ void
PUSH_DATAu(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
unsigned delta, unsigned dwords)
{
- BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
- PUSH_DATA (push, (bo->offset + delta) >> 32);
- PUSH_DATA (push, (bo->offset + delta));
- BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
- PUSH_DATA (push, dwords * 4);
- PUSH_DATA (push, 1);
- BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
- PUSH_DATA (push, 0x100111);
- BEGIN_NIC0(push, NVC0_M2MF(DATA), dwords);
+ if (push->client->device->chipset < 0xe0) {
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATA (push, (bo->offset + delta) >> 32);
+ PUSH_DATA (push, (bo->offset + delta));
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, dwords * 4);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), dwords);
+ } else {
+ BEGIN_NVC0(push, SUBC_P2MF(0x0180), 4);
+ PUSH_DATA (push, dwords * 4);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, (bo->offset + delta) >> 32);
+ PUSH_DATA (push, (bo->offset + delta));
+ BEGIN_1IC0(push, SUBC_P2MF(0x01b0), 1 + dwords);
+ PUSH_DATA (push, 0x001001);
+ }
}
#endif
View
@@ -1006,3 +1006,60 @@ NVC0EXARectM2MF(NVPtr pNv, int w, int h, int cpp,
return TRUE;
}
+
+Bool
+NVE0EXARectCopy(NVPtr pNv, int w, int h, int cpp,
+ struct nouveau_bo *src, uint32_t src_off, int src_dom,
+ int src_pitch, int src_h, int src_x, int src_y,
+ struct nouveau_bo *dst, uint32_t dst_off, int dst_dom,
+ int dst_pitch, int dst_h, int dst_x, int dst_y)
+{
+ struct nouveau_pushbuf *push = pNv->pushbuf;
+ struct nouveau_pushbuf_refn refs[] = {
+ { src, src_dom | NOUVEAU_BO_RD },
+ { dst, dst_dom | NOUVEAU_BO_WR },
+ };
+ unsigned exec;
+
+ if (nouveau_pushbuf_space(push, 64, 0, 0) ||
+ nouveau_pushbuf_refn (push, refs, 2))
+ return FALSE;
+
+ exec = 0x00000206;
+ if (!src->config.nvc0.memtype) {
+ src_off += src_y * src_pitch + src_x * cpp;
+ exec |= 0x00000080;
+ }
+ if (!dst->config.nvc0.memtype) {
+ dst_off += dst_y * dst_pitch + dst_x * cpp;
+ exec |= 0x00000100;
+ }
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0728), 6);
+ PUSH_DATA (push, 0x00001000 | src->config.nvc0.tile_mode);
+ PUSH_DATA (push, src_pitch);
+ PUSH_DATA (push, src_h);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, (src_y << 16) | src_x * cpp);
+ BEGIN_NVC0(push, SUBC_COPY(0x070c), 6);
+ PUSH_DATA (push, 0x000001000 | dst->config.nvc0.tile_mode);
+ PUSH_DATA (push, dst_pitch);
+ PUSH_DATA (push, dst_h);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, (dst_y << 16) | dst_x * cpp);
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 8);
+ PUSH_DATA (push, (src->offset + src_off) >> 32);
+ PUSH_DATA (push, (src->offset + src_off));
+ PUSH_DATA (push, (dst->offset + dst_off) >> 32);
+ PUSH_DATA (push, (dst->offset + dst_off));
+ PUSH_DATA (push, src_pitch);
+ PUSH_DATA (push, dst_pitch);
+ PUSH_DATA (push, w * cpp);
+ PUSH_DATA (push, h);
+ BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
+ PUSH_DATA (push, exec);
+
+ return TRUE;
+}
View
@@ -5,7 +5,6 @@
const unsigned size = sizeof(code) / sizeof(code[0]); \
PUSH_DATAu((pNv)->pushbuf, (pNv)->tesla_scratch, (addr), size); \
PUSH_DATAp((pNv)->pushbuf, (code), size); \
- ErrorF("20 + %d * 2\n", (size - 20) / 2); \
} while(0)
static uint32_t
Oops, something went wrong.

0 comments on commit 0c78508

Please sign in to comment.