Skip to content

Commit 954605c

Browse files
author
Maarten Lankhorst
committed
drm/radeon: use common fence implementation for fences, v4
Changes since v1: - Kill the sw interrupt dance, add and use radeon_irq_kms_sw_irq_get_delayed instead. - Change custom wait function, lockdep complained about it. Holding exclusive_lock in the wait function might cause deadlocks. Instead do all the processing in .enable_signaling, and wait on the global fence_queue to pick up gpu resets. - Process all fences in radeon_gpu_reset after reset to close a race with the trylock in enable_signaling. Changes since v2: - Small changes to work with the rewritten lockup recovery patches. Changes since v3: - Call radeon_fence_schedule_check when exclusive_lock cannot be acquired to always cause a wake up. - Reset irqs from hangup check. - Drop reading seqno in the callback, use cached value. - Fix indentation in radeon_fence_default_wait - Add a radeon_test_signaled function, drop a few test_bit calls. - Make to_radeon_fence global. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> Reviewed-by: Christian König <christian.koenig@amd.com>
1 parent 1f0dc9a commit 954605c

File tree

4 files changed

+235
-29
lines changed

4 files changed

+235
-29
lines changed

drivers/gpu/drm/radeon/radeon.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
#include <linux/kref.h>
6767
#include <linux/interval_tree.h>
6868
#include <linux/hashtable.h>
69+
#include <linux/fence.h>
6970

7071
#include <ttm/ttm_bo_api.h>
7172
#include <ttm/ttm_bo_driver.h>
@@ -354,17 +355,19 @@ struct radeon_fence_driver {
354355
/* sync_seq is protected by ring emission lock */
355356
uint64_t sync_seq[RADEON_NUM_RINGS];
356357
atomic64_t last_seq;
357-
bool initialized;
358+
bool initialized, delayed_irq;
358359
struct delayed_work lockup_work;
359360
};
360361

361362
struct radeon_fence {
363+
struct fence base;
364+
362365
struct radeon_device *rdev;
363-
struct kref kref;
364-
/* protected by radeon_fence.lock */
365366
uint64_t seq;
366367
/* RB, DMA, etc. */
367368
unsigned ring;
369+
370+
wait_queue_t fence_wake;
368371
};
369372

370373
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -782,6 +785,7 @@ struct radeon_irq {
782785
int radeon_irq_kms_init(struct radeon_device *rdev);
783786
void radeon_irq_kms_fini(struct radeon_device *rdev);
784787
void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
788+
bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring);
785789
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
786790
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
787791
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
@@ -2308,6 +2312,7 @@ struct radeon_device {
23082312
struct radeon_mman mman;
23092313
struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS];
23102314
wait_queue_head_t fence_queue;
2315+
unsigned fence_context;
23112316
struct mutex ring_lock;
23122317
struct radeon_ring ring[RADEON_NUM_RINGS];
23132318
bool ib_pool_ready;
@@ -2441,7 +2446,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
24412446
/*
24422447
* Cast helper
24432448
*/
2444-
#define to_radeon_fence(p) ((struct radeon_fence *)(p))
2449+
extern const struct fence_ops radeon_fence_ops;
2450+
2451+
static inline struct radeon_fence *to_radeon_fence(struct fence *f)
2452+
{
2453+
struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
2454+
2455+
if (__f->base.ops == &radeon_fence_ops)
2456+
return __f;
2457+
2458+
return NULL;
2459+
}
24452460

24462461
/*
24472462
* Registers read & write functions.

drivers/gpu/drm/radeon/radeon_device.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,7 @@ int radeon_device_init(struct radeon_device *rdev,
12531253
for (i = 0; i < RADEON_NUM_RINGS; i++) {
12541254
rdev->ring[i].idx = i;
12551255
}
1256+
rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS);
12561257

12571258
DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
12581259
radeon_family_name[rdev->family], pdev->vendor, pdev->device,

drivers/gpu/drm/radeon/radeon_fence.c

Lines changed: 200 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -130,21 +130,59 @@ int radeon_fence_emit(struct radeon_device *rdev,
130130
struct radeon_fence **fence,
131131
int ring)
132132
{
133+
u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
134+
133135
/* we are protected by the ring emission mutex */
134136
*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
135137
if ((*fence) == NULL) {
136138
return -ENOMEM;
137139
}
138-
kref_init(&((*fence)->kref));
139140
(*fence)->rdev = rdev;
140-
(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
141+
(*fence)->seq = seq;
141142
(*fence)->ring = ring;
143+
fence_init(&(*fence)->base, &radeon_fence_ops,
144+
&rdev->fence_queue.lock, rdev->fence_context + ring, seq);
142145
radeon_fence_ring_emit(rdev, ring, *fence);
143146
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
144147
radeon_fence_schedule_check(rdev, ring);
145148
return 0;
146149
}
147150

151+
/**
152+
* radeon_fence_check_signaled - callback from fence_queue
153+
*
154+
* this function is called with fence_queue lock held, which is also used
155+
* for the fence locking itself, so unlocked variants are used for
156+
* fence_signal, and remove_wait_queue.
157+
*/
158+
static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
159+
{
160+
struct radeon_fence *fence;
161+
u64 seq;
162+
163+
fence = container_of(wait, struct radeon_fence, fence_wake);
164+
165+
/*
166+
* We cannot use radeon_fence_process here because we're already
167+
* in the waitqueue, in a call from wake_up_all.
168+
*/
169+
seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
170+
if (seq >= fence->seq) {
171+
int ret = fence_signal_locked(&fence->base);
172+
173+
if (!ret)
174+
FENCE_TRACE(&fence->base, "signaled from irq context\n");
175+
else
176+
FENCE_TRACE(&fence->base, "was already signaled\n");
177+
178+
radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
179+
__remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
180+
fence_put(&fence->base);
181+
} else
182+
FENCE_TRACE(&fence->base, "pending\n");
183+
return 0;
184+
}
185+
148186
/**
149187
* radeon_fence_activity - check for fence activity
150188
*
@@ -242,6 +280,15 @@ static void radeon_fence_check_lockup(struct work_struct *work)
242280
return;
243281
}
244282

283+
if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
284+
unsigned long irqflags;
285+
286+
fence_drv->delayed_irq = false;
287+
spin_lock_irqsave(&rdev->irq.lock, irqflags);
288+
radeon_irq_set(rdev);
289+
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
290+
}
291+
245292
if (radeon_fence_activity(rdev, ring))
246293
wake_up_all(&rdev->fence_queue);
247294

@@ -275,21 +322,6 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
275322
wake_up_all(&rdev->fence_queue);
276323
}
277324

278-
/**
279-
* radeon_fence_destroy - destroy a fence
280-
*
281-
* @kref: fence kref
282-
*
283-
* Frees the fence object (all asics).
284-
*/
285-
static void radeon_fence_destroy(struct kref *kref)
286-
{
287-
struct radeon_fence *fence;
288-
289-
fence = container_of(kref, struct radeon_fence, kref);
290-
kfree(fence);
291-
}
292-
293325
/**
294326
* radeon_fence_seq_signaled - check if a fence sequence number has signaled
295327
*
@@ -318,6 +350,75 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
318350
return false;
319351
}
320352

353+
static bool radeon_fence_is_signaled(struct fence *f)
354+
{
355+
struct radeon_fence *fence = to_radeon_fence(f);
356+
struct radeon_device *rdev = fence->rdev;
357+
unsigned ring = fence->ring;
358+
u64 seq = fence->seq;
359+
360+
if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
361+
return true;
362+
}
363+
364+
if (down_read_trylock(&rdev->exclusive_lock)) {
365+
radeon_fence_process(rdev, ring);
366+
up_read(&rdev->exclusive_lock);
367+
368+
if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
369+
return true;
370+
}
371+
}
372+
return false;
373+
}
374+
375+
/**
376+
* radeon_fence_enable_signaling - enable signalling on fence
377+
* @fence: fence
378+
*
379+
* This function is called with fence_queue lock held, and adds a callback
380+
* to fence_queue that checks if this fence is signaled, and if so it
381+
* signals the fence and removes itself.
382+
*/
383+
static bool radeon_fence_enable_signaling(struct fence *f)
384+
{
385+
struct radeon_fence *fence = to_radeon_fence(f);
386+
struct radeon_device *rdev = fence->rdev;
387+
388+
if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
389+
return false;
390+
391+
if (down_read_trylock(&rdev->exclusive_lock)) {
392+
radeon_irq_kms_sw_irq_get(rdev, fence->ring);
393+
394+
if (radeon_fence_activity(rdev, fence->ring))
395+
wake_up_all_locked(&rdev->fence_queue);
396+
397+
/* did fence get signaled after we enabled the sw irq? */
398+
if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
399+
radeon_irq_kms_sw_irq_put(rdev, fence->ring);
400+
up_read(&rdev->exclusive_lock);
401+
return false;
402+
}
403+
404+
up_read(&rdev->exclusive_lock);
405+
} else {
406+
/* we're probably in a lockup, lets not fiddle too much */
407+
if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
408+
rdev->fence_drv[fence->ring].delayed_irq = true;
409+
radeon_fence_schedule_check(rdev, fence->ring);
410+
}
411+
412+
fence->fence_wake.flags = 0;
413+
fence->fence_wake.private = NULL;
414+
fence->fence_wake.func = radeon_fence_check_signaled;
415+
__add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
416+
fence_get(f);
417+
418+
FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
419+
return true;
420+
}
421+
321422
/**
322423
* radeon_fence_signaled - check if a fence has signaled
323424
*
@@ -330,8 +431,15 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
330431
{
331432
if (!fence)
332433
return true;
333-
if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring))
434+
435+
if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
436+
int ret;
437+
438+
ret = fence_signal(&fence->base);
439+
if (!ret)
440+
FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
334441
return true;
442+
}
335443
return false;
336444
}
337445

@@ -433,17 +541,15 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
433541
uint64_t seq[RADEON_NUM_RINGS] = {};
434542
long r;
435543

436-
if (fence == NULL) {
437-
WARN(1, "Querying an invalid fence : %p !\n", fence);
438-
return -EINVAL;
439-
}
440-
441544
seq[fence->ring] = fence->seq;
442545
r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
443546
if (r < 0) {
444547
return r;
445548
}
446549

550+
r = fence_signal(&fence->base);
551+
if (!r)
552+
FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
447553
return 0;
448554
}
449555

@@ -557,7 +663,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
557663
*/
558664
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
559665
{
560-
kref_get(&fence->kref);
666+
fence_get(&fence->base);
561667
return fence;
562668
}
563669

@@ -574,7 +680,7 @@ void radeon_fence_unref(struct radeon_fence **fence)
574680

575681
*fence = NULL;
576682
if (tmp) {
577-
kref_put(&tmp->kref, radeon_fence_destroy);
683+
fence_put(&tmp->base);
578684
}
579685
}
580686

@@ -887,3 +993,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev)
887993
return 0;
888994
#endif
889995
}
996+
997+
static const char *radeon_fence_get_driver_name(struct fence *fence)
998+
{
999+
return "radeon";
1000+
}
1001+
1002+
static const char *radeon_fence_get_timeline_name(struct fence *f)
1003+
{
1004+
struct radeon_fence *fence = to_radeon_fence(f);
1005+
switch (fence->ring) {
1006+
case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1007+
case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1008+
case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1009+
case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1010+
case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1011+
case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1012+
case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1013+
case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1014+
default: WARN_ON_ONCE(1); return "radeon.unk";
1015+
}
1016+
}
1017+
1018+
static inline bool radeon_test_signaled(struct radeon_fence *fence)
1019+
{
1020+
return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1021+
}
1022+
1023+
static signed long radeon_fence_default_wait(struct fence *f, bool intr,
1024+
signed long t)
1025+
{
1026+
struct radeon_fence *fence = to_radeon_fence(f);
1027+
struct radeon_device *rdev = fence->rdev;
1028+
bool signaled;
1029+
1030+
fence_enable_sw_signaling(&fence->base);
1031+
1032+
/*
1033+
* This function has to return -EDEADLK, but cannot hold
1034+
* exclusive_lock during the wait because some callers
1035+
* may already hold it. This means checking needs_reset without
1036+
* lock, and not fiddling with any gpu internals.
1037+
*
1038+
* The callback installed with fence_enable_sw_signaling will
1039+
* run before our wait_event_*timeout call, so we will see
1040+
* both the signaled fence and the changes to needs_reset.
1041+
*/
1042+
1043+
if (intr)
1044+
t = wait_event_interruptible_timeout(rdev->fence_queue,
1045+
((signaled = radeon_test_signaled(fence)) ||
1046+
rdev->needs_reset), t);
1047+
else
1048+
t = wait_event_timeout(rdev->fence_queue,
1049+
((signaled = radeon_test_signaled(fence)) ||
1050+
rdev->needs_reset), t);
1051+
1052+
if (t > 0 && !signaled)
1053+
return -EDEADLK;
1054+
return t;
1055+
}
1056+
1057+
const struct fence_ops radeon_fence_ops = {
1058+
.get_driver_name = radeon_fence_get_driver_name,
1059+
.get_timeline_name = radeon_fence_get_timeline_name,
1060+
.enable_signaling = radeon_fence_enable_signaling,
1061+
.signaled = radeon_fence_is_signaled,
1062+
.wait = radeon_fence_default_wait,
1063+
.release = NULL,
1064+
};

0 commit comments

Comments
 (0)