Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

gdev: updated scheduler

gdev: temporarily disabled garbage collection due to shared memory bugs
gdev: added gbarrier()
  • Loading branch information...
commit 30bc41b326727e9f392671fe7bb22edd527a0bdb 1 parent 5709200
Shinpei Kato authored
View
9 common/gdev_api.c
@@ -881,6 +881,15 @@ int gsync(struct gdev_handle *h, uint32_t id, struct gdev_time *timeout)
}
/**
+ * gbarrier():
+ * explicitly barrier the memory.
+ */
+int gbarrier(struct gdev_handle *h)
+{
+ return gdev_barrier(h->ctx);
+}
+
+/**
* gquery():
* query the device-specific information.
*/
View
1  common/gdev_api.h
@@ -59,6 +59,7 @@ int gmemcpy_user_from_device_async(Ghandle h, void *dst_buf, uint64_t src_addr,
int gmemcpy_in_device(Ghandle h, uint64_t dst_addr, uint64_t src_addr, uint64_t size);
int glaunch(Ghandle h, struct gdev_kernel *kernel, uint32_t *id);
int gsync(Ghandle h, uint32_t id, struct gdev_time *timeout);
+int gbarrier(Ghandle h);
int gquery(Ghandle h, uint32_t type, uint64_t *result);
int gtune(Ghandle h, uint32_t type, uint32_t value);
int gshmget(Ghandle h, int key, uint64_t size, int flags);
View
1  common/gdev_arch.h
@@ -52,6 +52,7 @@ void gdev_write32(gdev_mem_t *mem, uint64_t addr, uint32_t val);
int gdev_read(gdev_mem_t *mem, void *buf, uint64_t addr, uint32_t size);
int gdev_write(gdev_mem_t *mem, uint64_t addr, const void *buf, uint32_t size);
int gdev_poll(gdev_ctx_t *ctx, uint32_t seq, struct gdev_time *timeout);
+int gdev_barrier(struct gdev_ctx *ctx);
int gdev_query(struct gdev_device *gdev, uint32_t type, uint64_t *result);
/**
View
13 common/gdev_ioctl_def.h
@@ -43,12 +43,13 @@
#define GDEV_IOCTL_GMEMCPY_IN_DEVICE 0x108
#define GDEV_IOCTL_GLAUNCH 0x109
#define GDEV_IOCTL_GSYNC 0x110
-#define GDEV_IOCTL_GQUERY 0x111
-#define GDEV_IOCTL_GTUNE 0x112
-#define GDEV_IOCTL_GSHMGET 0x113
-#define GDEV_IOCTL_GSHMAT 0x114
-#define GDEV_IOCTL_GSHMDT 0x115
-#define GDEV_IOCTL_GSHMCTL 0x116
+#define GDEV_IOCTL_GBARRIER 0x111
+#define GDEV_IOCTL_GQUERY 0x112
+#define GDEV_IOCTL_GTUNE 0x113
+#define GDEV_IOCTL_GSHMGET 0x114
+#define GDEV_IOCTL_GSHMAT 0x115
+#define GDEV_IOCTL_GSHMDT 0x116
+#define GDEV_IOCTL_GSHMCTL 0x117
struct gdev_ioctl_mem {
uint64_t addr;
View
4 common/gdev_nvidia.h
@@ -35,6 +35,10 @@
#include "gdev_system.h"
#include "gdev_time.h"
+#ifdef GDEV_SCHED_MRQ
+#define GDEV_NVIDIA_MEMCPY_PCOPY
+#endif
+
#define GDEV_SUBCH_NV_COMPUTE GDEV_SUBCH_COMPUTE
#ifndef GDEV_NVIDIA_MEMCPY_PCOPY
#define GDEV_SUBCH_NV_M2MF GDEV_SUBCH_MEMCPY
View
17 common/gdev_nvidia_compute.c
@@ -143,7 +143,7 @@ int gdev_poll(struct gdev_ctx *ctx, uint32_t seq, struct gdev_time *timeout)
struct gdev_compute *compute = gdev->compute;
gdev_time_stamp(&time_start);
- gdev_time_ms(&time_relax, 100); /* relax polling when 100 ms elapsed. */
+ gdev_time_ms(&time_relax, 1000); /* relax polling when 1000 ms elapsed. */
while (seq != compute->fence_read(ctx, seq)) {
gdev_time_stamp(&time_now);
@@ -162,6 +162,21 @@ int gdev_poll(struct gdev_ctx *ctx, uint32_t seq, struct gdev_time *timeout)
return 0;
}
+/* barrier memory by blocking. */
+int gdev_barrier(struct gdev_ctx *ctx)
+{
+ struct gdev_vas *vas = ctx->vas;
+ struct gdev_device *gdev = vas->gdev;
+ struct gdev_compute *compute = gdev->compute;
+ uint32_t seq = 0; /* 0 is a special sequence for barrier. */
+
+ compute->membar(ctx);
+ compute->fence_write(ctx, GDEV_SUBCH_COMPUTE, seq);
+ while (seq != compute->fence_read(ctx, seq));
+
+ return 0;
+}
+
/* query device-specific information. */
int gdev_query(struct gdev_device *gdev, uint32_t type, uint64_t *result)
{
View
3  common/gdev_nvidia_mem.c
@@ -206,10 +206,12 @@ void gdev_mem_free(struct gdev_mem *mem)
/* garbage collection: free all memory left in heap. */
void gdev_mem_gc(struct gdev_vas *vas)
{
+#ifdef GDEV_GARBAGE_COLLECTION_SUPPORT
struct gdev_mem *mem;
/* device memory. */
gdev_list_for_each (mem, &vas->mem_list, list_entry_heap) {
+ GDEV_PRINT("Garbage Collect 0x%llx\n", mem->addr);
gdev_mem_free(mem);
}
@@ -217,6 +219,7 @@ void gdev_mem_gc(struct gdev_vas *vas)
gdev_list_for_each (mem, &vas->dma_mem_list, list_entry_heap) {
gdev_mem_free(mem);
}
+#endif
}
/* look up the memory object allocated at the specified address. */
View
2  common/gdev_nvidia_shm.c
@@ -157,8 +157,6 @@ int gdev_shm_create(struct gdev_device *gdev, struct gdev_vas *vas, int key, uin
int gdev_shm_destroy_mark(struct gdev_device *gdev, struct gdev_mem *owner)
{
gdev_mutex_lock(&owner->shm->mutex);
- /* delete the current owner. */
- gdev_list_del(&owner->list_entry_shm);
/* find the new owner (could be NULL). */
gdev_shm_owners[owner->shm->id] =
gdev_list_container(gdev_list_head(&owner->shm->mem_list));
View
42 common/gdev_sched.c
@@ -172,16 +172,24 @@ static void __gdev_dequeue_memory(struct gdev_sched_entity *se)
/**
* scheduling policy files.
*/
+#include "gdev_vsched_band.c"
#include "gdev_vsched_credit.c"
-#include "gdev_vsched_crod.c"
+#include "gdev_vsched_cnods.c"
+#include "gdev_vsched_fifo.c"
-#define GDEV_VSCHED_POLICY_CREDIT
-//#define GDEV_VSCHED_POLICY_CROD
+#define GDEV_VSCHED_POLICY_BAND
+//#define GDEV_VSCHED_POLICY_CREDIT
+//#define GDEV_VSCHED_POLICY_CNODS
+//#define GDEV_VSCHED_POLICY_FIFO
-#if defined(GDEV_VSCHED_POLICY_CREDIT)
+#if defined(GDEV_VSCHED_POLICY_BAND)
+struct gdev_vsched_policy *gdev_vsched = &gdev_vsched_band;
+#elif defined(GDEV_VSCHED_POLICY_CREDIT)
struct gdev_vsched_policy *gdev_vsched = &gdev_vsched_credit;
-#elif defined(GDEV_VSCHED_POLICY_CROD)
-struct gdev_vsched_policy *gdev_vsched = &gdev_vsched_crod;
+#elif defined(GDEV_VSCHED_POLICY_CNODS)
+struct gdev_vsched_policy *gdev_vsched = &gdev_vsched_cnods;
+#elif defined(GDEV_VSCHED_POLICY_FIFO)
+struct gdev_vsched_policy *gdev_vsched = &gdev_vsched_fifo;
#endif
/**
@@ -259,10 +267,6 @@ void gdev_select_next_compute(struct gdev_device *gdev)
gdev->current_com = (void*)se;
gdev_unlock(&gdev->sched_com_lock);
- printk("gdev%d->credit_com = %s%lu\n", gdev->id,
- gdev->credit_com.neg ? "-" : "",
- gdev_time_to_us(&gdev->credit_com));
-
/* select the next device to be scheduled. */
next = gdev_vsched->select_next_compute(gdev);
if (!next)
@@ -280,7 +284,7 @@ void gdev_select_next_compute(struct gdev_device *gdev)
__gdev_dequeue_compute(se);
gdev_unlock(&next->sched_com_lock);
- while (gdev_sched_wakeup(se->task) < 0) {
+ if (gdev_sched_wakeup(se->task) < 0) {
GDEV_PRINT("Failed to wake up context %d\n", se->ctx->cid);
}
}
@@ -306,6 +310,11 @@ void gdev_schedule_memory(struct gdev_sched_entity *se)
{
struct gdev_device *gdev = se->gdev;
+#ifndef GDEV_SCHED_MRQ
+ gdev_schedule_compute(se);
+ return;
+#endif
+
resched:
/* algorithm-specific virtual device scheduler. */
gdev_vsched->schedule_memory(se);
@@ -345,6 +354,11 @@ void gdev_select_next_memory(struct gdev_device *gdev)
struct gdev_device *next;
struct gdev_time now, exec;
+#ifndef GDEV_SCHED_MRQ
+ gdev_select_next_compute(gdev);
+ return;
+#endif
+
gdev_lock(&gdev->sched_mem_lock);
se = (struct gdev_sched_entity *)gdev->current_mem;
if (!se) {
@@ -374,10 +388,6 @@ void gdev_select_next_memory(struct gdev_device *gdev)
gdev->current_mem = (void*)se;
gdev_unlock(&gdev->sched_mem_lock);
- printk("gdev%d->credit_mem = %s%lu\n", gdev->id,
- gdev->credit_mem.neg ? "-" : "",
- gdev_time_to_us(&gdev->credit_mem));
-
/* select the next device to be scheduled. */
next = gdev_vsched->select_next_memory(gdev);
if (!next)
@@ -411,5 +421,7 @@ void gdev_select_next_memory(struct gdev_device *gdev)
*/
void gdev_replenish_credit_memory(struct gdev_device *gdev)
{
+#ifdef GDEV_SCHED_MRQ
gdev_vsched->replenish_memory(gdev);
+#endif
}
View
11 common/gdev_sched.h
@@ -33,6 +33,13 @@
#include "gdev_time.h"
/**
+ * Queueing methods:
+ * SGQ: Single Device Queue
+ * MRQ: Multiple Resource Queues
+ */
+#define GDEV_SCHED_SGQ /*GDEV_SCHED_MRQ */
+
+/**
* priority levels.
*/
#define GDEV_PRIO_MAX 40
@@ -42,9 +49,9 @@
/**
* virtual device period/threshold.
*/
-#define GDEV_PERIOD_DEFAULT 100000 /*30000*/ /* microseconds */
+#define GDEV_PERIOD_DEFAULT 30000 /* microseconds */
#define GDEV_CREDIT_INACTIVE_THRESHOLD GDEV_PERIOD_DEFAULT
-#define GDEV_UPDATE_INTERVAL (GDEV_PERIOD_DEFAULT * 10)
+#define GDEV_UPDATE_INTERVAL 1000000
/**
* scheduling properties.
View
34 common/gdev_time.h
@@ -118,10 +118,16 @@ static inline void gdev_time_clear(struct gdev_time *t)
}
-/* x == y. */
+/* x == y */
static inline int gdev_time_eq(struct gdev_time *x, struct gdev_time *y)
{
- return (x->sec == y->sec) && (x->usec && y->usec);
+ return (x->sec == y->sec) && (x->usec == y->usec);
+}
+
+/* p == 0 */
+static inline int gdev_time_eqz(struct gdev_time *p)
+{
+ return (p->sec == 0) && (p->usec == 0);
}
/* x > y */
@@ -137,6 +143,12 @@ static inline int gdev_time_gt(struct gdev_time *x, struct gdev_time *y)
return (x->sec == y->sec) ? (x->usec > y->usec) : (x->sec > y->sec);
}
+/* p > 0 */
+static inline int gdev_time_gtz(struct gdev_time *p)
+{
+ return (!p->neg) && ((p->sec > 0) || (p->usec > 0));
+}
+
/* x >= y */
static inline int gdev_time_ge(struct gdev_time *x, struct gdev_time *y)
{
@@ -146,6 +158,12 @@ static inline int gdev_time_ge(struct gdev_time *x, struct gdev_time *y)
return gdev_time_gt(x, y);
}
+/* p >= 0 */
+static inline int gdev_time_gez(struct gdev_time *p)
+{
+ return gdev_time_gtz(p) || gdev_time_eqz(p);
+}
+
/* x < y */
static inline int gdev_time_lt(struct gdev_time *x, struct gdev_time *y)
{
@@ -159,6 +177,12 @@ static inline int gdev_time_lt(struct gdev_time *x, struct gdev_time *y)
return (x->sec == y->sec) ? (x->usec < y->usec) : (x->sec < y->sec);
}
+/* p < 0 */
+static inline int gdev_time_ltz(struct gdev_time *p)
+{
+ return p->neg;
+}
+
/* x <= y */
static inline int gdev_time_le(struct gdev_time *x, struct gdev_time *y)
{
@@ -168,6 +192,12 @@ static inline int gdev_time_le(struct gdev_time *x, struct gdev_time *y)
return gdev_time_lt(x, y);
}
+/* p <= 0 */
+static inline int gdev_time_lez(struct gdev_time *p)
+{
+ return gdev_time_ltz(p) || gdev_time_eqz(p);
+}
+
/* ret = x + y (x and y must be positive) */
static inline void __gdev_time_add_pos(struct gdev_time *ret, struct gdev_time *x, struct gdev_time *y)
{
View
212 common/gdev_vsched_band.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2012 Shinpei Kato
+ *
+ * University of California, Santa Cruz
+ * Systems Research Lab.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+static void __gdev_vsched_band_yield_chance(struct gdev_device *gdev)
+{
+ struct gdev_time time_wait, time_now;
+ gdev_time_stamp(&time_now);
+ gdev_time_us(&time_wait, 500);
+ gdev_time_add(&time_wait, &time_wait, &time_now);
+ while (gdev_time_lt(&time_now, &time_wait)) {
+ SCHED_YIELD();
+ gdev_time_stamp(&time_now);
+ }
+}
+
+static void gdev_vsched_band_schedule_compute(struct gdev_sched_entity *se)
+{
+ struct gdev_device *gdev = se->gdev;
+ struct gdev_device *phys = gdev->parent;
+
+ if (!phys)
+ return;
+
+resched:
+ if (gdev_time_lez(&gdev->credit_com) && gdev->com_bw_used > gdev->com_bw)
+ __gdev_vsched_band_yield_chance(gdev);
+
+ gdev_lock(&phys->sched_com_lock);
+
+ if (phys->current_com && phys->current_com != gdev) {
+ /* insert the scheduling entity to its local priority-ordered list. */
+ gdev_lock_nested(&gdev->sched_com_lock);
+ __gdev_enqueue_compute(gdev, se);
+ gdev_unlock_nested(&gdev->sched_com_lock);
+ gdev_unlock(&phys->sched_com_lock);
+
+ /* now the corresponding task will be suspended until some other tasks
+ will awaken it upon completions of their compute launches. */
+ gdev_sched_sleep();
+
+ goto resched;
+ }
+ else {
+ phys->current_com = (void *)gdev;
+ gdev_unlock(&phys->sched_com_lock);
+ }
+}
+
+static struct gdev_device *gdev_vsched_band_select_next_compute(struct gdev_device *gdev)
+{
+ struct gdev_device *phys = gdev->parent;
+ struct gdev_device *next;
+
+ if (!phys)
+ return gdev;
+
+ gdev_lock(&phys->sched_com_lock);
+
+ /* if the credit is exhausted, reinsert the device. */
+ if (gdev_time_lez(&gdev->credit_com) && gdev->com_bw_used > gdev->com_bw) {
+ gdev_list_del(&gdev->list_entry_com);
+ gdev_list_add_tail(&gdev->list_entry_com, &phys->sched_com_list);
+ }
+
+ gdev_list_for_each(next, &phys->sched_com_list, list_entry_com) {
+ gdev_lock_nested(&next->sched_com_lock);
+ if (!gdev_list_empty(&next->sched_com_list)) {
+ gdev_unlock_nested(&next->sched_com_lock);
+ goto device_switched;
+ }
+ gdev_unlock_nested(&next->sched_com_lock);
+ }
+ next = NULL;
+device_switched:
+ phys->current_com = (void*)next; /* could be null */
+ gdev_unlock(&phys->sched_com_lock);
+
+ return next;
+}
+
+static void gdev_vsched_band_replenish_compute(struct gdev_device *gdev)
+{
+ struct gdev_time credit, threshold;
+
+ gdev_time_us(&credit, gdev->period * gdev->com_bw / 100);
+ gdev_time_add(&gdev->credit_com, &gdev->credit_com, &credit);
+ /* when the credit exceeds the threshold, all credits taken away. */
+ gdev_time_us(&threshold, GDEV_CREDIT_INACTIVE_THRESHOLD);
+ if (gdev_time_gt(&gdev->credit_com, &threshold))
+ gdev_time_us(&gdev->credit_com, 0);
+ /* when the credit exceeds the threshold in negative, even it. */
+ threshold.neg = 1;
+ if (gdev_time_lt(&gdev->credit_com, &threshold))
+ gdev_time_us(&gdev->credit_com, 0);
+}
+
+static void gdev_vsched_band_schedule_memory(struct gdev_sched_entity *se)
+{
+ struct gdev_device *gdev = se->gdev;
+ struct gdev_device *phys = gdev->parent;
+
+ if (!phys)
+ return;
+
+resched:
+ if (gdev_time_lez(&gdev->credit_mem) && gdev->mem_bw_used > gdev->mem_bw)
+ __gdev_vsched_band_yield_chance(gdev);
+
+ gdev_lock(&phys->sched_mem_lock);
+ if (phys->current_mem && phys->current_mem != gdev) {
+ /* insert the scheduling entity to its local priority-ordered list. */
+ gdev_lock_nested(&gdev->sched_mem_lock);
+ __gdev_enqueue_memory(gdev, se);
+ gdev_unlock_nested(&gdev->sched_mem_lock);
+ gdev_unlock(&phys->sched_mem_lock);
+
+ /* now the corresponding task will be suspended until some other tasks
+ will awaken it upon completions of their memory transfers. */
+ gdev_sched_sleep();
+
+ goto resched;
+ }
+ else {
+ phys->current_mem = (void *)gdev;
+ gdev_unlock(&phys->sched_mem_lock);
+ }
+}
+
+static struct gdev_device *gdev_vsched_band_select_next_memory(struct gdev_device *gdev)
+{
+ struct gdev_device *phys = gdev->parent;
+ struct gdev_device *next;
+
+ if (!phys)
+ return gdev;
+
+ gdev_lock(&phys->sched_mem_lock);
+
+ /* if the credit is exhausted, reinsert the device. */
+ if (gdev_time_lez(&gdev->credit_mem) && gdev->mem_bw_used > gdev->mem_bw) {
+ gdev_list_del(&gdev->list_entry_mem);
+ gdev_list_add_tail(&gdev->list_entry_mem, &phys->sched_mem_list);
+ }
+
+ gdev_list_for_each(next, &phys->sched_mem_list, list_entry_mem) {
+ gdev_lock_nested(&next->sched_mem_lock);
+ if (!gdev_list_empty(&next->sched_mem_list)) {
+ gdev_unlock_nested(&next->sched_mem_lock);
+ goto device_switched;
+ }
+ gdev_unlock_nested(&next->sched_mem_lock);
+ }
+ next = NULL;
+device_switched:
+ phys->current_mem = (void*)next; /* could be null */
+ gdev_unlock(&phys->sched_mem_lock);
+
+ return next;
+}
+
+static void gdev_vsched_band_replenish_memory(struct gdev_device *gdev)
+{
+ struct gdev_time credit, threshold;
+
+ gdev_time_us(&credit, gdev->period * gdev->mem_bw / 100);
+ gdev_time_add(&gdev->credit_mem, &gdev->credit_mem, &credit);
+ /* when the credit exceeds the threshold, all credits taken away. */
+ gdev_time_us(&threshold, GDEV_CREDIT_INACTIVE_THRESHOLD);
+ if (gdev_time_gt(&gdev->credit_mem, &threshold))
+ gdev_time_us(&gdev->credit_mem, 0);
+ /* when the credit exceeds the threshold in negative, even it. */
+ threshold.neg = 1;
+ if (gdev_time_lt(&gdev->credit_mem, &threshold))
+ gdev_time_us(&gdev->credit_mem, 0);
+}
+
+/**
+ * Bandwidth-aware non-preemptive device (Band) scheduler implementation
+ */
+struct gdev_vsched_policy gdev_vsched_band = {
+ .schedule_compute = gdev_vsched_band_schedule_compute,
+ .select_next_compute = gdev_vsched_band_select_next_compute,
+ .replenish_compute = gdev_vsched_band_replenish_compute,
+ .schedule_memory = gdev_vsched_band_schedule_memory,
+ .select_next_memory = gdev_vsched_band_select_next_memory,
+ .replenish_memory = gdev_vsched_band_replenish_memory,
+};
View
96 common/gdev_vsched_crod.c → common/gdev_vsched_cnods.c
@@ -26,7 +26,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-static void gdev_vsched_crod_schedule_compute(struct gdev_sched_entity *se)
+static void gdev_vsched_cnods_schedule_compute(struct gdev_sched_entity *se)
{
struct gdev_device *gdev = se->gdev;
struct gdev_device *phys = gdev->parent;
@@ -43,8 +43,6 @@ static void gdev_vsched_crod_schedule_compute(struct gdev_sched_entity *se)
gdev_unlock_nested(&gdev->sched_com_lock);
gdev_unlock(&phys->sched_com_lock);
- GDEV_PRINT("Gdev#%d Sleep\n", gdev->id);
-
/* now the corresponding task will be suspended until some other tasks
will awaken it upon completions of their compute launches. */
gdev_sched_sleep();
@@ -54,11 +52,10 @@ static void gdev_vsched_crod_schedule_compute(struct gdev_sched_entity *se)
else {
phys->current_com = (void *)gdev;
gdev_unlock(&phys->sched_com_lock);
- GDEV_PRINT("Gdev#%d Run\n", gdev->id);
}
}
-static struct gdev_device *gdev_vsched_crod_select_next_compute(struct gdev_device *gdev)
+static struct gdev_device *gdev_vsched_cnods_select_next_compute(struct gdev_device *gdev)
{
struct gdev_device *phys = gdev->parent;
struct gdev_device *next;
@@ -74,7 +71,6 @@ static struct gdev_device *gdev_vsched_crod_select_next_compute(struct gdev_devi
if (gdev_time_le(&gdev->credit_com, &zero)) {
gdev_list_del(&gdev->list_entry_com);
gdev_list_add_tail(&gdev->list_entry_com, &phys->sched_com_list);
- gdev_time_clear(&gdev->credit_com); /* clear the credit! */
}
gdev_list_for_each(next, &phys->sched_com_list, list_entry_com) {
@@ -93,25 +89,87 @@ static struct gdev_device *gdev_vsched_crod_select_next_compute(struct gdev_devi
return next;
}
-static void gdev_vsched_crod_replenish_compute(struct gdev_device *gdev)
+static void gdev_vsched_cnods_replenish_compute(struct gdev_device *gdev)
+{
+ gdev_time_us(&gdev->credit_com, gdev->period * gdev->com_bw / 100);
+}
+
+static void gdev_vsched_cnods_schedule_memory(struct gdev_sched_entity *se)
+{
+ struct gdev_device *gdev = se->gdev;
+ struct gdev_device *phys = gdev->parent;
+
+ if (!phys)
+ return;
+
+resched:
+ gdev_lock(&phys->sched_mem_lock);
+ if (phys->current_mem && phys->current_mem != gdev) {
+ /* insert the scheduling entity to its local priority-ordered list. */
+ gdev_lock_nested(&gdev->sched_mem_lock);
+ __gdev_enqueue_memory(gdev, se);
+ gdev_unlock_nested(&gdev->sched_mem_lock);
+ gdev_unlock(&phys->sched_mem_lock);
+
+ /* now the corresponding task will be suspended until some other tasks
+ will awaken it upon completions of their memory transfers. */
+ gdev_sched_sleep();
+
+ goto resched;
+ }
+ else {
+ phys->current_mem = (void *)gdev;
+ gdev_unlock(&phys->sched_mem_lock);
+ }
+}
+
+static struct gdev_device *gdev_vsched_cnods_select_next_memory(struct gdev_device *gdev)
{
- struct gdev_time credit, zero;
+ struct gdev_device *phys = gdev->parent;
+ struct gdev_device *next;
+ struct gdev_time zero;
+ if (!phys)
+ return gdev;
+
+ gdev_lock(&phys->sched_mem_lock);
+
+ /* if the credit is exhausted, reinsert the device. */
gdev_time_us(&zero, 0);
- gdev_lock(&gdev->sched_com_lock);
- if (!gdev_list_empty(&gdev->sched_com_list) ||
- gdev_time_eq(&gdev->credit_com, &zero)) {
- gdev_time_us(&credit, gdev->period * gdev->com_bw / 100);
- gdev_time_add(&gdev->credit_com, &gdev->credit_com, &credit);
+ if (gdev_time_le(&gdev->credit_mem, &zero)) {
+ gdev_list_del(&gdev->list_entry_mem);
+ gdev_list_add_tail(&gdev->list_entry_mem, &phys->sched_mem_list);
}
- gdev_unlock(&gdev->sched_com_lock);
+
+ gdev_list_for_each(next, &phys->sched_mem_list, list_entry_mem) {
+ gdev_lock_nested(&next->sched_mem_lock);
+ if (!gdev_list_empty(&next->sched_mem_list)) {
+ gdev_unlock_nested(&next->sched_mem_lock);
+ goto device_switched;
+ }
+ gdev_unlock_nested(&next->sched_mem_lock);
+ }
+ next = NULL;
+device_switched:
+ phys->current_mem = (void*)next; /* could be null */
+ gdev_unlock(&phys->sched_mem_lock);
+
+ return next;
+}
+
+static void gdev_vsched_cnods_replenish_memory(struct gdev_device *gdev)
+{
+ gdev_time_us(&gdev->credit_mem, gdev->period * gdev->mem_bw / 100);
}
/**
- * the Xen Credit scheduler implementation.
+ * the Gdev Credit No Debt and Savings (CNODS) scheduler implementation.
*/
-struct gdev_vsched_policy gdev_vsched_crod = {
- .schedule_compute = gdev_vsched_crod_schedule_compute,
- .select_next_compute = gdev_vsched_crod_select_next_compute,
- .replenish_compute = gdev_vsched_crod_replenish_compute,
+struct gdev_vsched_policy gdev_vsched_cnods = {
+ .schedule_compute = gdev_vsched_cnods_schedule_compute,
+ .select_next_compute = gdev_vsched_cnods_select_next_compute,
+ .replenish_compute = gdev_vsched_cnods_replenish_compute,
+ .schedule_memory = gdev_vsched_cnods_schedule_memory,
+ .select_next_memory = gdev_vsched_cnods_select_next_memory,
+ .replenish_memory = gdev_vsched_cnods_replenish_memory,
};
View
17 common/gdev_vsched_credit.c
@@ -36,6 +36,7 @@ static void gdev_vsched_credit_schedule_compute(struct gdev_sched_entity *se)
resched:
gdev_lock(&phys->sched_com_lock);
+
if (phys->current_com && phys->current_com != gdev) {
/* insert the scheduling entity to its local priority-ordered list. */
gdev_lock_nested(&gdev->sched_com_lock);
@@ -43,8 +44,6 @@ static void gdev_vsched_credit_schedule_compute(struct gdev_sched_entity *se)
gdev_unlock_nested(&gdev->sched_com_lock);
gdev_unlock(&phys->sched_com_lock);
- GDEV_PRINT("Gdev#%d Compute Sleep\n", gdev->id);
-
/* now the corresponding task will be suspended until some other tasks
will awaken it upon completions of their compute launches. */
gdev_sched_sleep();
@@ -54,8 +53,6 @@ static void gdev_vsched_credit_schedule_compute(struct gdev_sched_entity *se)
else {
phys->current_com = (void *)gdev;
gdev_unlock(&phys->sched_com_lock);
-
- GDEV_PRINT("Gdev#%d Compute Run\n", gdev->id);
}
}
@@ -63,7 +60,6 @@ static struct gdev_device *gdev_vsched_credit_select_next_compute(struct gdev_de
{
struct gdev_device *phys = gdev->parent;
struct gdev_device *next;
- struct gdev_time zero;
if (!phys)
return gdev;
@@ -71,8 +67,7 @@ static struct gdev_device *gdev_vsched_credit_select_next_compute(struct gdev_de
gdev_lock(&phys->sched_com_lock);
/* if the credit is exhausted, reinsert the device. */
- gdev_time_us(&zero, 0);
- if (gdev_time_le(&gdev->credit_com, &zero)) {
+ if (gdev_time_lez(&gdev->credit_com)) {
gdev_list_del(&gdev->list_entry_com);
gdev_list_add_tail(&gdev->list_entry_com, &phys->sched_com_list);
}
@@ -126,8 +121,6 @@ static void gdev_vsched_credit_schedule_memory(struct gdev_sched_entity *se)
gdev_unlock_nested(&gdev->sched_mem_lock);
gdev_unlock(&phys->sched_mem_lock);
- GDEV_PRINT("Gdev#%d Memory Sleep\n", gdev->id);
-
/* now the corresponding task will be suspended until some other tasks
will awaken it upon completions of their memory transfers. */
gdev_sched_sleep();
@@ -137,8 +130,6 @@ static void gdev_vsched_credit_schedule_memory(struct gdev_sched_entity *se)
else {
phys->current_mem = (void *)gdev;
gdev_unlock(&phys->sched_mem_lock);
-
- GDEV_PRINT("Gdev#%d Memory Run\n", gdev->id);
}
}
@@ -146,7 +137,6 @@ static struct gdev_device *gdev_vsched_credit_select_next_memory(struct gdev_dev
{
struct gdev_device *phys = gdev->parent;
struct gdev_device *next;
- struct gdev_time zero;
if (!phys)
return gdev;
@@ -154,8 +144,7 @@ static struct gdev_device *gdev_vsched_credit_select_next_memory(struct gdev_dev
gdev_lock(&phys->sched_mem_lock);
/* if the credit is exhausted, reinsert the device. */
- gdev_time_us(&zero, 0);
- if (gdev_time_le(&gdev->credit_mem, &zero)) {
+ if (gdev_time_lez(&gdev->credit_mem)) {
gdev_list_del(&gdev->list_entry_mem);
gdev_list_add_tail(&gdev->list_entry_mem, &phys->sched_mem_list);
}
View
163 common/gdev_vsched_fifo.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2012 Shinpei Kato
+ *
+ * University of California, Santa Cruz
+ * Systems Research Lab.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+static void gdev_vsched_fifo_schedule_compute(struct gdev_sched_entity *se)
+{
+ struct gdev_device *gdev = se->gdev;
+ struct gdev_device *phys = gdev->parent;
+
+ if (!phys)
+ return;
+
+resched:
+ gdev_lock(&phys->sched_com_lock);
+ if (phys->current_com && phys->current_com != gdev) {
+ /* insert the scheduling entity to its local priority-ordered list. */
+ gdev_lock_nested(&gdev->sched_com_lock);
+ __gdev_enqueue_compute(gdev, se);
+ gdev_unlock_nested(&gdev->sched_com_lock);
+ gdev_unlock(&phys->sched_com_lock);
+
+ /* now the corresponding task will be suspended until some other tasks
+ will awaken it upon completions of their compute launches. */
+ gdev_sched_sleep();
+
+ goto resched;
+ }
+ else {
+ phys->current_com = (void *)gdev;
+ gdev_unlock(&phys->sched_com_lock);
+ }
+}
+
+static struct gdev_device *gdev_vsched_fifo_select_next_compute(struct gdev_device *gdev)
+{
+ struct gdev_device *phys = gdev->parent;
+ struct gdev_device *next;
+
+ if (!phys)
+ return gdev;
+
+ gdev_lock(&phys->sched_com_lock);
+
+ gdev_list_del(&gdev->list_entry_com);
+ gdev_list_add_tail(&gdev->list_entry_com, &phys->sched_com_list);
+
+ gdev_list_for_each(next, &phys->sched_com_list, list_entry_com) {
+ gdev_lock_nested(&next->sched_com_lock);
+ if (!gdev_list_empty(&next->sched_com_list)) {
+ gdev_unlock_nested(&next->sched_com_lock);
+ goto device_switched;
+ }
+ gdev_unlock_nested(&next->sched_com_lock);
+ }
+ next = NULL;
+device_switched:
+ phys->current_com = (void*)next; /* could be null */
+ gdev_unlock(&phys->sched_com_lock);
+
+ return next;
+}
+
+static void gdev_vsched_fifo_replenish_compute(struct gdev_device *gdev)
+{
+}
+
+static void gdev_vsched_fifo_schedule_memory(struct gdev_sched_entity *se)
+{
+ struct gdev_device *gdev = se->gdev;
+ struct gdev_device *phys = gdev->parent;
+
+ if (!phys)
+ return;
+
+resched:
+ gdev_lock(&phys->sched_mem_lock);
+ if (phys->current_mem && phys->current_mem != gdev) {
+ /* insert the scheduling entity to its local priority-ordered list. */
+ gdev_lock_nested(&gdev->sched_mem_lock);
+ __gdev_enqueue_memory(gdev, se);
+ gdev_unlock_nested(&gdev->sched_mem_lock);
+ gdev_unlock(&phys->sched_mem_lock);
+
+ /* now the corresponding task will be suspended until some other tasks
+ will awaken it upon completions of their memory transfers. */
+ gdev_sched_sleep();
+
+ goto resched;
+ }
+ else {
+ phys->current_mem = (void *)gdev;
+ gdev_unlock(&phys->sched_mem_lock);
+ }
+}
+
+static struct gdev_device *gdev_vsched_fifo_select_next_memory(struct gdev_device *gdev)
+{
+ struct gdev_device *phys = gdev->parent;
+ struct gdev_device *next;
+
+ if (!phys)
+ return gdev;
+
+ gdev_lock(&phys->sched_mem_lock);
+
+ gdev_list_del(&gdev->list_entry_mem);
+ gdev_list_add_tail(&gdev->list_entry_mem, &phys->sched_mem_list);
+
+ gdev_list_for_each(next, &phys->sched_mem_list, list_entry_mem) {
+ gdev_lock_nested(&next->sched_mem_lock);
+ if (!gdev_list_empty(&next->sched_mem_list)) {
+ gdev_unlock_nested(&next->sched_mem_lock);
+ goto device_switched;
+ }
+ gdev_unlock_nested(&next->sched_mem_lock);
+ }
+ next = NULL;
+device_switched:
+ phys->current_mem = (void*)next; /* could be null */
+ gdev_unlock(&phys->sched_mem_lock);
+
+ return next;
+}
+
+static void gdev_vsched_fifo_replenish_memory(struct gdev_device *gdev)
+{
+}
+
+/**
+ * the Xen Null scheduler implementation.
+ */
+struct gdev_vsched_policy gdev_vsched_fifo = {
+ .schedule_compute = gdev_vsched_fifo_schedule_compute,
+ .select_next_compute = gdev_vsched_fifo_select_next_compute,
+ .replenish_compute = gdev_vsched_fifo_replenish_compute,
+ .schedule_memory = gdev_vsched_fifo_schedule_memory,
+ .select_next_memory = gdev_vsched_fifo_select_next_memory,
+ .replenish_memory = gdev_vsched_fifo_replenish_memory,
+};
View
3  cuda/driver_api/context.c
@@ -340,6 +340,9 @@ CUresult cuCtxSynchronize(void)
FREE(l);
}
+ if (gbarrier(handle))
+ return CUDA_ERROR_UNKNOWN;
+
return CUDA_SUCCESS;
}
View
44 driver/gdev/gdev_drv.c
@@ -124,26 +124,36 @@ static void __gdev_credit_handler(unsigned long __data)
static int __gdev_credit_com_thread(void *__data)
{
struct gdev_device *gdev = (struct gdev_device*)__data;
+ struct gdev_time now, last, elapse, interval;
struct timer_list timer;
- unsigned long elapsed = 0;
+ unsigned long effective_jiffies;
GDEV_PRINT("Gdev#%d compute reserve running\n", gdev->id);
setup_timer_on_stack(&timer, __gdev_credit_handler, (unsigned long)current);
+ gdev_time_us(&interval, GDEV_UPDATE_INTERVAL);
+ gdev_time_stamp(&last);
+ effective_jiffies = jiffies;
+
while (!kthread_should_stop()) {
gdev_replenish_credit_compute(gdev);
- mod_timer(&timer, jiffies + usecs_to_jiffies(gdev->period));
+ mod_timer(&timer, effective_jiffies + usecs_to_jiffies(gdev->period));
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
- elapsed += gdev->period;
- if (elapsed >= GDEV_UPDATE_INTERVAL) {
- gdev->com_bw_used = gdev->com_time * 100 / GDEV_UPDATE_INTERVAL;
+ effective_jiffies = jiffies;
+
+ gdev_lock(&gdev->sched_com_lock);
+ gdev_time_stamp(&now);
+ gdev_time_sub(&elapse, &now, &last);
+ if (gdev_time_ge(&elapse, &interval)) {
+ gdev->com_bw_used = gdev->com_time * 100 / gdev_time_to_us(&elapse);
if (gdev->com_bw_used > 100)
gdev->com_bw_used = 100;
gdev->com_time = 0;
- elapsed = 0;
+ gdev_time_stamp(&last);
}
+ gdev_unlock(&gdev->sched_com_lock);
}
local_irq_enable();
@@ -159,26 +169,36 @@ static int __gdev_credit_com_thread(void *__data)
static int __gdev_credit_mem_thread(void *__data)
{
struct gdev_device *gdev = (struct gdev_device*)__data;
+ struct gdev_time now, last, elapse, interval;
struct timer_list timer;
- unsigned long elapsed = 0;
+ unsigned long effective_jiffies;
GDEV_PRINT("Gdev#%d memory reserve running\n", gdev->id);
setup_timer_on_stack(&timer, __gdev_credit_handler, (unsigned long)current);
+ gdev_time_us(&interval, GDEV_UPDATE_INTERVAL);
+ gdev_time_stamp(&last);
+ effective_jiffies = jiffies;
+
while (!kthread_should_stop()) {
gdev_replenish_credit_memory(gdev);
- mod_timer(&timer, jiffies + usecs_to_jiffies(gdev->period));
+ mod_timer(&timer, effective_jiffies + usecs_to_jiffies(gdev->period));
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
- elapsed += gdev->period;
- if (elapsed >= GDEV_UPDATE_INTERVAL) {
- gdev->mem_bw_used = gdev->mem_time * 100 / GDEV_UPDATE_INTERVAL;
+ effective_jiffies = jiffies;
+
+ gdev_lock(&gdev->sched_mem_lock);
+ gdev_time_stamp(&now);
+ gdev_time_sub(&elapse, &now, &last);
+ if (gdev_time_ge(&elapse, &interval)) {
+ gdev->mem_bw_used = gdev->mem_time * 100 / gdev_time_to_us(&elapse);
if (gdev->mem_bw_used > 100)
gdev->mem_bw_used = 100;
gdev->mem_time = 0;
- elapsed = 0;
+ gdev_time_stamp(&last);
}
+ gdev_unlock(&gdev->sched_mem_lock);
}
local_irq_enable();
View
2  driver/gdev/gdev_fops.c
@@ -106,6 +106,8 @@ static int gdev_ioctl
return gdev_ioctl_glaunch(handle, arg);
case GDEV_IOCTL_GSYNC:
return gdev_ioctl_gsync(handle, arg);
+ case GDEV_IOCTL_GBARRIER:
+ return gdev_ioctl_gbarrier(handle, arg);
case GDEV_IOCTL_GQUERY:
return gdev_ioctl_gquery(handle, arg);
case GDEV_IOCTL_GTUNE:
View
5 driver/gdev/gdev_ioctl.c
@@ -312,6 +312,11 @@ int gdev_ioctl_gsync(Ghandle handle, unsigned long arg)
return gsync(handle, sync.id, &timeout);
}
+int gdev_ioctl_gbarrier(Ghandle handle, unsigned long arg)
+{
+ return gbarrier(handle);
+}
+
int gdev_ioctl_gquery(Ghandle handle, unsigned long arg)
{
struct gdev_ioctl_query q;
View
1  driver/gdev/gdev_ioctl.h
@@ -43,6 +43,7 @@ int gdev_ioctl_gmemcpy_from_device_async(Ghandle h, unsigned long arg);
int gdev_ioctl_gmemcpy_in_device(Ghandle h, unsigned long arg);
int gdev_ioctl_glaunch(Ghandle h, unsigned long arg);
int gdev_ioctl_gsync(Ghandle h, unsigned long arg);
+int gdev_ioctl_gbarrier(Ghandle h, unsigned long arg);
int gdev_ioctl_gquery(Ghandle h, unsigned long arg);
int gdev_ioctl_gtune(Ghandle h, unsigned long arg);
int gdev_ioctl_gshmget(Ghandle h, unsigned long arg);
View
6 runtime/kernel/gdev_lib.c
@@ -252,6 +252,12 @@ int gsync(struct gdev_handle *h, uint32_t id, struct gdev_time *timeout)
return ioctl(fd, GDEV_IOCTL_GSYNC, &sync);
}
+int gbarrier(struct gdev_handle *h)
+{
+ int fd = h->fd;
+ return ioctl(fd, GDEV_IOCTL_GBARRIER, NULL);
+}
+
int gquery(struct gdev_handle *h, uint32_t type, uint64_t *result)
{
struct gdev_ioctl_query q;
View
4 test/cuda/common/loop_gpu.cu
@@ -5,7 +5,9 @@ __global__
void loop(uint32_t *data, uint32_t size, uint32_t n)
{
int i;
- for (i = 0; i < n/40; i++) {
+ //for (i = 0; i < n/40; i++) {
+ for (i = 0; i < n/5; i++) {
+
if (i * 4 < size)
data[i] = i + n;
}
View
4 test/cuda/common/loop_repeated.c
@@ -93,7 +93,7 @@ int cuda_test_loop_repeated(unsigned int n, int sec, int id, char *path)
}
repeat:
- usleep((tv.tv_usec % 10) * 100);
+ usleep(100);
res = cuMemcpyHtoD(d_data, data, n * sizeof(unsigned int));
if (res != CUDA_SUCCESS) {
printf("cuMemcpyHtoD failed: res = %lu\n", (unsigned long)res);
@@ -126,7 +126,7 @@ int cuda_test_loop_repeated(unsigned int n, int sec, int id, char *path)
printf("cuParamSetSize failed: res = %lu\n", (unsigned long)res);
return -1;
}
-
+
res = cuLaunchGrid(function, grid_x, grid_y);
if (res != CUDA_SUCCESS) {
printf("cuLaunchGrid failed: res = %lu\n", (unsigned long)res);
Please sign in to comment.
Something went wrong with that request. Please try again.