From e7e65caefd57913260c82fb751f07655671ec47e Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 10 Feb 2010 19:25:58 +0100 Subject: [PATCH 1/6] drm/nouveau: Fix up pre-nv17 analog load detection. Signed-off-by: Francisco Jerez Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv04_dac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c index d0e038d289484..1d73b15d70daf 100644 --- a/drivers/gpu/drm/nouveau/nv04_dac.c +++ b/drivers/gpu/drm/nouveau/nv04_dac.c @@ -119,7 +119,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector) { struct drm_device *dev = encoder->dev; - uint8_t saved_seq1, saved_pi, saved_rpc1; + uint8_t saved_seq1, saved_pi, saved_rpc1, saved_cr_mode; uint8_t saved_palette0[3], saved_palette_mask; uint32_t saved_rtest_ctrl, saved_rgen_ctrl; int i; @@ -135,6 +135,9 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder, /* only implemented for head A for now */ NVSetOwner(dev, 0); + saved_cr_mode = NVReadVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX); + NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode | 0x80); + saved_seq1 = NVReadVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX); NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1 & ~0x20); @@ -203,6 +206,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder, NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_PIXEL_INDEX, saved_pi); NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_RPC1_INDEX, saved_rpc1); NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1); + NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode); if (blue == 0x18) { NV_INFO(dev, "Load detected on head A\n"); From 66b6ebaccb176a2068bbe328f162614dce524621 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Feb 2010 10:23:30 +1000 Subject: [PATCH 2/6] drm/nv50: make nv50_mem_vm_{bind,unbind} operate only on vram GART is handled elsewhere, no reason to have the code for it here too. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 43 ++++++++------------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 8f3a12f614ed2..04885d2fb15fa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -285,53 +285,36 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, uint32_t flags, uint64_t phys) { struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_gpuobj **pgt; - unsigned psz, pfl, pages; - - if (virt >= dev_priv->vm_gart_base && - (virt + size) < (dev_priv->vm_gart_base + dev_priv->vm_gart_size)) { - psz = 12; - pgt = &dev_priv->gart_info.sg_ctxdma; - pfl = 0x21; - virt -= dev_priv->vm_gart_base; - } else - if (virt >= dev_priv->vm_vram_base && - (virt + size) < (dev_priv->vm_vram_base + dev_priv->vm_vram_size)) { - psz = 16; - pgt = dev_priv->vm_vram_pt; - pfl = 0x01; - virt -= dev_priv->vm_vram_base; - } else { - NV_ERROR(dev, "Invalid address: 0x%16llx-0x%16llx\n", - virt, virt + size - 1); - return -EINVAL; - } + unsigned pages; - pages = size >> psz; + virt -= dev_priv->vm_vram_base; + pages = size >> 16; dev_priv->engine.instmem.prepare_access(dev, true); if (flags & 0x80000000) { while (pages--) { - struct nouveau_gpuobj *pt = pgt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1; + struct nouveau_gpuobj *pt = + dev_priv->vm_vram_pt[virt >> 29]; + unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; nv_wo32(dev, pt, pte++, 0x00000000); nv_wo32(dev, pt, pte++, 0x00000000); - virt += (1 << psz); + virt += (1 << 16); } } else { while (pages--) { - struct nouveau_gpuobj *pt = pgt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1; + struct nouveau_gpuobj *pt = + dev_priv->vm_vram_pt[virt >> 29]; + unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; unsigned offset_h = upper_32_bits(phys) & 0xff; unsigned offset_l = lower_32_bits(phys); - nv_wo32(dev, pt, pte++, offset_l | pfl); + nv_wo32(dev, pt, pte++, offset_l | 1); nv_wo32(dev, pt, pte++, offset_h | flags); - phys += (1 << psz); - virt += (1 << psz); + phys += (1 << 16); + virt += (1 << 16); } } dev_priv->engine.instmem.finish_access(dev); From 4c27bd339d226175ac0e4dc3ab8289ba696db8be Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Feb 2010 10:25:53 +1000 Subject: [PATCH 3/6] drm/nv50: more efficient clearing of gpu page table entries Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 68 +++++++++++++++++---------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 04885d2fb15fa..6832c4c969a3c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -291,31 +291,17 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, pages = size >> 16; dev_priv->engine.instmem.prepare_access(dev, true); - if (flags & 0x80000000) { - while (pages--) { - struct nouveau_gpuobj *pt = - dev_priv->vm_vram_pt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; + while (pages--) { + struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt[virt >> 29]; + unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; + unsigned offset_h = upper_32_bits(phys) & 0xff; + unsigned offset_l = lower_32_bits(phys); - nv_wo32(dev, pt, pte++, 0x00000000); - nv_wo32(dev, pt, pte++, 0x00000000); + nv_wo32(dev, pt, pte++, offset_l | 1); + nv_wo32(dev, pt, pte++, offset_h | flags); - virt += (1 << 16); - } - } else { - while (pages--) { - struct nouveau_gpuobj *pt = - dev_priv->vm_vram_pt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; - unsigned offset_h = upper_32_bits(phys) & 0xff; - unsigned offset_l = lower_32_bits(phys); - - nv_wo32(dev, pt, pte++, offset_l | 1); - nv_wo32(dev, pt, pte++, offset_h | flags); - - phys += (1 << 16); - virt += (1 << 16); - } + phys += (1 << 16); + virt += (1 << 16); } dev_priv->engine.instmem.finish_access(dev); @@ -339,7 +325,41 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, void nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size) { - nv50_mem_vm_bind_linear(dev, virt, size, 0x80000000, 0); + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_gpuobj *pgt; + unsigned pages, pte, end; + + virt -= dev_priv->vm_vram_base; + pages = (size >> 16) << 1; + + dev_priv->engine.instmem.prepare_access(dev, true); + while (pages) { + pgt = dev_priv->vm_vram_pt[virt >> 29]; + pte = (virt & 0x1ffe0000ULL) >> 15; + + end = pte + pages; + if (end > 16384) + end = 16384; + pages -= (end - pte); + virt += (end - pte) << 15; + + while (pte < end) + nv_wo32(dev, pgt, pte++, 0); + } + dev_priv->engine.instmem.finish_access(dev); + + nv_wr32(dev, 0x100c80, 0x00050001); + if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) { + NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n"); + NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80)); + return; + } + + nv_wr32(dev, 0x100c80, 0x00000001); + if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) { + NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n"); + NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80)); + } } /* From 531e77139f26e8da32ee694b9ee5e6f4c764f1db Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Feb 2010 11:31:44 +1000 Subject: [PATCH 4/6] drm/nv50: improve vram page table construction This commit changes nouveau to construct PTEs which look very much like the ones the binary driver creates. I presume that filling multiple PTEs identically with length flags and the physical address of the start of a block of VRAM is a hint to the memory controller that it need not perform additional page table lookups for that range of addresses. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 44 ++++++++++++++++++++------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 6832c4c969a3c..134fedbb7669c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -285,23 +285,45 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, uint32_t flags, uint64_t phys) { struct drm_nouveau_private *dev_priv = dev->dev_private; - unsigned pages; + struct nouveau_gpuobj *pgt; + unsigned block; + int i; - virt -= dev_priv->vm_vram_base; - pages = size >> 16; + virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1; + size = (size >> 16) << 1; + phys |= ((uint64_t)flags << 32) | 1; dev_priv->engine.instmem.prepare_access(dev, true); - while (pages--) { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; - unsigned offset_h = upper_32_bits(phys) & 0xff; + while (size) { + unsigned offset_h = upper_32_bits(phys); unsigned offset_l = lower_32_bits(phys); + unsigned pte, end; + + for (i = 7; i >= 0; i--) { + block = 1 << (i + 1); + if (size >= block && !(virt & (block - 1))) + break; + } + offset_l |= (i << 7); + + phys += block << 15; + size -= block; - nv_wo32(dev, pt, pte++, offset_l | 1); - nv_wo32(dev, pt, pte++, offset_h | flags); + while (block) { + pgt = dev_priv->vm_vram_pt[virt >> 14]; + pte = virt & 0x3ffe; - phys += (1 << 16); - virt += (1 << 16); + end = pte + block; + if (end > 16384) + end = 16384; + block -= (end - pte); + virt += (end - pte); + + while (pte < end) { + nv_wo32(dev, pgt, pte++, offset_l); + nv_wo32(dev, pgt, pte++, offset_h); + } + } } dev_priv->engine.instmem.finish_access(dev); From 76befb8c30cebe2af83fa346bdaf75b430893511 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 20 Feb 2010 08:06:36 +1000 Subject: [PATCH 5/6] drm/nv50: fix instmem binding on IGPs to point at stolen system memory This also modifies the unused PRAMIN PT entries to be all zeroes, can't really recall why I used 9/0 initially, just that it didn't work for some reason. It was likely masking a bug elsewhere that's since been fixed. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drv.h | 1 + drivers/gpu/drm/nouveau/nv50_instmem.c | 58 ++++++++++++++++++-------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 5445cefdd03ec..1c15ef37b71cf 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -583,6 +583,7 @@ struct drm_nouveau_private { uint64_t vm_end; struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR]; int vm_vram_pt_nr; + uint64_t vram_sys_base; /* the mtrr covering the FB */ int fb_mtrr; diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c index 94400f777e7f9..f0dc4e36ef055 100644 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c @@ -76,6 +76,11 @@ nv50_instmem_init(struct drm_device *dev) for (i = 0x1700; i <= 0x1710; i += 4) priv->save1700[(i-0x1700)/4] = nv_rd32(dev, i); + if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac) + dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10) << 12; + else + dev_priv->vram_sys_base = 0; + /* Reserve the last MiB of VRAM, we should probably try to avoid * setting up the below tables over the top of the VBIOS image at * some point. @@ -172,16 +177,28 @@ nv50_instmem_init(struct drm_device *dev) * We map the entire fake channel into the start of the PRAMIN BAR */ ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pt_size, 0x1000, - 0, &priv->pramin_pt); + 0, &priv->pramin_pt); if (ret) return ret; - for (i = 0, v = c_offset; i < pt_size; i += 8, v += 0x1000) { - if (v < (c_offset + c_size)) - BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v | 1); - else - BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000009); + v = c_offset | 1; + if (dev_priv->vram_sys_base) { + v += dev_priv->vram_sys_base; + v |= 0x30; + } + + i = 0; + while (v < dev_priv->vram_sys_base + c_offset + c_size) { + BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v); + BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000); + v += 0x1000; + i += 8; + } + + while (i < pt_size) { + BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000000); BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000); + i += 8; } BAR0_WI32(chan->vm_pd, 0x00, priv->pramin_pt->instance | 0x63); @@ -416,7 +433,9 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv; - uint32_t pte, pte_end, vram; + struct nouveau_gpuobj *pramin_pt = priv->pramin_pt->gpuobj; + uint32_t pte, pte_end; + uint64_t vram; if (!gpuobj->im_backing || !gpuobj->im_pramin || gpuobj->im_bound) return -EINVAL; @@ -424,20 +443,24 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj) NV_DEBUG(dev, "st=0x%0llx sz=0x%0llx\n", gpuobj->im_pramin->start, gpuobj->im_pramin->size); - pte = (gpuobj->im_pramin->start >> 12) << 3; - pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte; + pte = (gpuobj->im_pramin->start >> 12) << 1; + pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte; vram = gpuobj->im_backing_start; NV_DEBUG(dev, "pramin=0x%llx, pte=%d, pte_end=%d\n", gpuobj->im_pramin->start, pte, pte_end); NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start); + vram |= 1; + if (dev_priv->vram_sys_base) { + vram += dev_priv->vram_sys_base; + vram |= 0x30; + } + dev_priv->engine.instmem.prepare_access(dev, true); while (pte < pte_end) { - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, vram | 1); - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000); - - pte += 8; + nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram)); + nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram)); vram += NV50_INSTMEM_PAGE_SIZE; } dev_priv->engine.instmem.finish_access(dev); @@ -470,14 +493,13 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj) if (gpuobj->im_bound == 0) return -EINVAL; - pte = (gpuobj->im_pramin->start >> 12) << 3; - pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte; + pte = (gpuobj->im_pramin->start >> 12) << 1; + pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte; dev_priv->engine.instmem.prepare_access(dev, true); while (pte < pte_end) { - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, 0x00000009); - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000); - pte += 8; + nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000); + nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000); } dev_priv->engine.instmem.finish_access(dev); From 6c42966768b0254f465a8f451333795283f53d22 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 20 Feb 2010 08:10:11 +1000 Subject: [PATCH 6/6] drm/nv50: fix vram ptes on IGPs to point at stolen system memory Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 134fedbb7669c..2dc09dbd817d0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -291,7 +291,13 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1; size = (size >> 16) << 1; - phys |= ((uint64_t)flags << 32) | 1; + + phys |= ((uint64_t)flags << 32); + phys |= 1; + if (dev_priv->vram_sys_base) { + phys += dev_priv->vram_sys_base; + phys |= 0x30; + } dev_priv->engine.instmem.prepare_access(dev, true); while (size) {