From d26098759cf6d32148649c165f87a7590bc25b89 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Sat, 9 Jun 2012 10:57:41 -0400
Subject: [PATCH 1/3] drm/radeon: fix tiling and command stream checking on
 evergreen v3

Fix regresson since the introduction of command stream checking on
evergreen (thread referenced below). Issue is cause by ddx allocating
bo with formula width*height*bpp while programming the GPU command
stream with ALIGN(height, 8). In some case (where page alignment does
not hide the extra size bo should be according to height alignment)
the kernel will reject the command stream.

This patch reprogram the command stream to slice - 1 (slice is
a derivative value from height) which avoid rejecting the command
stream while keeping the value of command stream checking from a
security point of view.

This patch also fix wrong computation of layer size for 2D tiled
surface. Which should fix issue when 2D color tiling is enabled.
This dump the radeon KMS_DRIVER_MINOR so userspace can know if
they are on a fixed kernel or not.

https://lkml.org/lkml/2012/6/3/80
https://bugs.freedesktop.org/show_bug.cgi?id=50892
https://bugs.freedesktop.org/show_bug.cgi?id=50857

!!! STABLE need a custom version of this patch for 3.4 !!!

v2: actually bump the minor version and add comment about stable
v3: do compute the height the ddx was trying to use

[airlied: drop left over debug]

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/evergreen_cs.c | 49 ++++++++++++++++++++++++---
 drivers/gpu/drm/radeon/radeon_drv.c   |  3 +-
 2 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 4e7dd2b4843d94..c16554122ccd0f 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -52,6 +52,7 @@ struct evergreen_cs_track {
 	u32			cb_color_view[12];
 	u32			cb_color_pitch[12];
 	u32			cb_color_slice[12];
+	u32			cb_color_slice_idx[12];
 	u32			cb_color_attrib[12];
 	u32			cb_color_cmask_slice[8];/* unused */
 	u32			cb_color_fmask_slice[8];/* unused */
@@ -127,12 +128,14 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 		track->cb_color_info[i] = 0;
 		track->cb_color_view[i] = 0xFFFFFFFF;
 		track->cb_color_pitch[i] = 0;
-		track->cb_color_slice[i] = 0;
+		track->cb_color_slice[i] = 0xfffffff;
+		track->cb_color_slice_idx[i] = 0;
 	}
 	track->cb_target_mask = 0xFFFFFFFF;
 	track->cb_shader_mask = 0xFFFFFFFF;
 	track->cb_dirty = true;
 
+	track->db_depth_slice = 0xffffffff;
 	track->db_depth_view = 0xFFFFC000;
 	track->db_depth_size = 0xFFFFFFFF;
 	track->db_depth_control = 0xFFFFFFFF;
@@ -250,10 +253,9 @@ static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
 {
 	struct evergreen_cs_track *track = p->track;
 	unsigned palign, halign, tileb, slice_pt;
+	unsigned mtile_pr, mtile_ps, mtileb;
 
 	tileb = 64 * surf->bpe * surf->nsamples;
-	palign = track->group_size / (8 * surf->bpe * surf->nsamples);
-	palign = MAX(8, palign);
 	slice_pt = 1;
 	if (tileb > surf->tsplit) {
 		slice_pt = tileb / surf->tsplit;
@@ -262,7 +264,10 @@ static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
 	/* macro tile width & height */
 	palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
 	halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
-	surf->layer_size = surf->nbx * surf->nby * surf->bpe * slice_pt;
+	mtileb = (palign / 8) * (halign / 8) * tileb;;
+	mtile_pr = surf->nbx / palign;
+	mtile_ps = (mtile_pr * surf->nby) / halign;
+	surf->layer_size = mtile_ps * mtileb * slice_pt;
 	surf->base_align = (palign / 8) * (halign / 8) * tileb;
 	surf->palign = palign;
 	surf->halign = halign;
@@ -434,6 +439,39 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i
 
 	offset += surf.layer_size * mslice;
 	if (offset > radeon_bo_size(track->cb_color_bo[id])) {
+		/* old ddx are broken they allocate bo with w*h*bpp but
+		 * program slice with ALIGN(h, 8), catch this and patch
+		 * command stream.
+		 */
+		if (!surf.mode) {
+			volatile u32 *ib = p->ib.ptr;
+			unsigned long tmp, nby, bsize, size, min = 0;
+
+			/* find the height the ddx wants */
+			if (surf.nby > 8) {
+				min = surf.nby - 8;
+			}
+			bsize = radeon_bo_size(track->cb_color_bo[id]);
+			tmp = track->cb_color_bo_offset[id] << 8;
+			for (nby = surf.nby; nby > min; nby--) {
+				size = nby * surf.nbx * surf.bpe * surf.nsamples;
+				if ((tmp + size * mslice) <= bsize) {
+					break;
+				}
+			}
+			if (nby > min) {
+				surf.nby = nby;
+				slice = ((nby * surf.nbx) / 64) - 1;
+				if (!evergreen_surface_check(p, &surf, "cb")) {
+					/* check if this one works */
+					tmp += surf.layer_size * mslice;
+					if (tmp <= bsize) {
+						ib[track->cb_color_slice_idx[id]] = slice;
+						goto old_ddx_ok;
+					}
+				}
+			}
+		}
 		dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
 			 "offset %d, max layer %d, bo size %ld, slice %d)\n",
 			 __func__, __LINE__, id, surf.layer_size,
@@ -446,6 +484,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i
 			surf.tsplit, surf.mtilea);
 		return -EINVAL;
 	}
+old_ddx_ok:
 
 	return 0;
 }
@@ -1532,6 +1571,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 	case CB_COLOR7_SLICE:
 		tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
 		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
+		track->cb_color_slice_idx[tmp] = idx;
 		track->cb_dirty = true;
 		break;
 	case CB_COLOR8_SLICE:
@@ -1540,6 +1580,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 	case CB_COLOR11_SLICE:
 		tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
 		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
+		track->cb_color_slice_idx[tmp] = idx;
 		track->cb_dirty = true;
 		break;
 	case CB_COLOR0_ATTRIB:
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index f0bb2b543b13d2..03e5f5df40f168 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -57,9 +57,10 @@
  *   2.13.0 - virtual memory support, streamout
  *   2.14.0 - add evergreen tiling informations
  *   2.15.0 - add max_pipes query
+ *   2.16.0 - fix evergreen 2D tiled surface calculation
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	15
+#define KMS_DRIVER_MINOR	16
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);

From fe4561680519019cc15d660862dce513ded2f3a7 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Tue, 12 Jun 2012 11:27:01 -0300
Subject: [PATCH 2/3] drm: increase DRM_OBJECT_MAX_PROPERTY to 24

Before Kernel 3.5, no one was checking for the return value of
drm_connector_attach_property, so we never noticed that we were unable
to create some properties. Commit "drm: WARN() when
drm_connector_attach_property fails" added a WARN when we fail to
create a property, and the transition from "connector properties" to
"object properties" changed the warning message a little bit.

On i915 machines with many TV connectors we hit the maximum number of
properties (since each TV connector uses a lot of properties), so we
get a few backtraces in our logs. This commit increases the maximum
number of properties to 24 hoping we'll have enough room for
everybody.

Chris suggested that we convert this code to "lists", but I believe
this conversion can come after we make sure people's dmesgs are not
spammed by our driver.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reported-by: Dave Jones <davej@redhat.com>
Tested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_crtc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 73e45600f95def..bac55c2151131c 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -54,7 +54,7 @@ struct drm_mode_object {
 	struct drm_object_properties *properties;
 };
 
-#define DRM_OBJECT_MAX_PROPERTY 16
+#define DRM_OBJECT_MAX_PROPERTY 24
 struct drm_object_properties {
 	int count;
 	uint32_t ids[DRM_OBJECT_MAX_PROPERTY];

From a393c730ab69617c3291a3b0b2a228c9be2fc28c Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 12 Jun 2012 13:28:42 +0200
Subject: [PATCH 3/3] drm/ttm: Fix buffer object metadata accounting regression
 v2

A regression was introduced in the 3.3 rc series, commit
"drm/ttm: simplify memory accounting for ttm user v2",
causing the metadata of buffer objects created using the ttm_bo_create()
function to be accounted twice.
That causes massive leaks with the vmwgfx driver running for example
SpecViewperf Catia-03 test 2, eventually killing the app.

Furthermore, the same commit introduces a regression where
metadata accounting is leaked if a buffer object is
initialized with an illegal size. This is also fixed with this commit.

v2: Fixed an error path and removed an unused variable.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b67cfcaa661f87..36f4b28c1b90a4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1204,6 +1204,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 			(*destroy)(bo);
 		else
 			kfree(bo);
+		ttm_mem_global_free(mem_glob, acc_size);
 		return -EINVAL;
 	}
 	bo->destroy = destroy;
@@ -1307,22 +1308,14 @@ int ttm_bo_create(struct ttm_bo_device *bdev,
 			struct ttm_buffer_object **p_bo)
 {
 	struct ttm_buffer_object *bo;
-	struct ttm_mem_global *mem_glob = bdev->glob->mem_glob;
 	size_t acc_size;
 	int ret;
 
-	acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object));
-	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
-	if (unlikely(ret != 0))
-		return ret;
-
 	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
-
-	if (unlikely(bo == NULL)) {
-		ttm_mem_global_free(mem_glob, acc_size);
+	if (unlikely(bo == NULL))
 		return -ENOMEM;
-	}
 
+	acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object));
 	ret = ttm_bo_init(bdev, bo, size, type, placement, page_alignment,
 				buffer_start, interruptible,
 			  persistent_swap_storage, acc_size, NULL, NULL);