Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

fpvm: new scheduler from Werner (NEEDS REPLACEMENT OF alloca())

  • Loading branch information...
commit 0c4c26d100804c8a6e7e625cecc6e5805ed8c67b 1 parent 7414dca
Sébastien Bourdeauducq authored October 01, 2011
11  software/bios/Makefile
@@ -44,7 +44,7 @@ boot.o: ../../software/include/net/microudp.h
44 44
 boot.o: ../../software/include/net/tftp.h ../../software/include/hal/vga.h
45 45
 boot.o: ../../software/include/hal/usb.h ../../software/include/hw/hpdmc.h
46 46
 boot.o: ../../software/include/hw/common.h ../../software/include/hw/flash.h
47  
-boot.o: boot.h
  47
+boot.o: unlzma.h boot.h
48 48
 isr.o: ../../software/include/hw/interrupts.h
49 49
 isr.o: ../../software/include/base/irq.h ../../software/include/base/uart.h
50 50
 isr.o: ../../software/include/hal/usb.h ../../software/include/hal/tmu.h
@@ -62,11 +62,12 @@ main.o: ../../software/include/base/version.h
62 62
 main.o: ../../software/include/net/mdio.h ../../software/include/hw/fmlbrg.h
63 63
 main.o: ../../software/include/hw/sysctl.h ../../software/include/hw/common.h
64 64
 main.o: ../../software/include/hw/gpio.h ../../software/include/hw/flash.h
65  
-main.o: ../../software/include/hal/vga.h ../../software/include/hal/tmu.h
66  
-main.o: ../../software/include/hw/tmu.h ../../software/include/hal/brd.h
67  
-main.o: ../../software/include/hal/usb.h ../../software/include/hal/ukb.h
68  
-main.o: boot.h splash.h
  65
+main.o: ../../software/include/hw/minimac.h ../../software/include/hal/vga.h
  66
+main.o: ../../software/include/hal/tmu.h ../../software/include/hw/tmu.h
  67
+main.o: ../../software/include/hal/brd.h ../../software/include/hal/usb.h
  68
+main.o: ../../software/include/hal/ukb.h boot.h splash.h
69 69
 splash.o: ../../software/include/base/stdio.h
70 70
 splash.o: ../../software/include/base/stdlib.h
71 71
 splash.o: ../../software/include/hal/vga.h ../../software/include/hw/flash.h
72 72
 splash.o: splash.h
  73
+unlzma.o: unlzma.h
3  software/demo/Makefile
@@ -62,7 +62,8 @@ eval.o: ../../software/include/base/stdlib.h
62 62
 eval.o: ../../software/include/base/string.h ../../software/include/hw/pfpu.h
63 63
 eval.o: ../../software/include/hw/common.h ../../software/include/hw/tmu.h
64 64
 eval.o: ../../software/include/hal/pfpu.h ../../software/include/fpvm/fpvm.h
65  
-eval.o: ../../software/include/fpvm/is.h ../../software/include/fpvm/gfpus.h
  65
+eval.o: ../../software/include/fpvm/is.h
  66
+eval.o: ../../software/include/fpvm/schedulers.h
66 67
 eval.o: ../../software/include/fpvm/pfpu.h eval.h renderer.h
67 68
 font.o: color.h font.h
68 69
 isr.o: ../../software/include/hw/interrupts.h
8  software/demo/eval.c
... ...
@@ -1,6 +1,6 @@
1 1
 /*
2 2
  * Milkymist SoC (Software)
3  
- * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
  3
+ * Copyright (C) 2007, 2008, 2009, 2010, 2011 Sebastien Bourdeauducq
4 4
  *
5 5
  * This program is free software: you can redistribute it and/or modify
6 6
  * it under the terms of the GNU General Public License as published by
@@ -22,7 +22,7 @@
22 22
 
23 23
 #include <hal/pfpu.h>
24 24
 #include <fpvm/fpvm.h>
25  
-#include <fpvm/gfpus.h>
  25
+#include <fpvm/schedulers.h>
26 26
 #include <fpvm/pfpu.h>
27 27
 
28 28
 #include "eval.h"
@@ -281,7 +281,7 @@ static int finalize_pfv()
281 281
 
282 282
 static int schedule_pfv()
283 283
 {
284  
-	perframe_prog_length = gfpus_schedule(&pfv_fragment, (unsigned int *)perframe_prog, (unsigned int *)perframe_regs);
  284
+	perframe_prog_length = fpvm_default_schedule(&pfv_fragment, (unsigned int *)perframe_prog, (unsigned int *)perframe_regs);
285 285
 	eval_reinit_all_pfv();
286 286
 	if(perframe_prog_length < 0) {
287 287
 		printf("EVL: per-frame VLIW scheduling failed\n");
@@ -527,7 +527,7 @@ static int finalize_pvv()
527 527
 
528 528
 static int schedule_pvv()
529 529
 {
530  
-	pervertex_prog_length = gfpus_schedule(&pvv_fragment, (unsigned int *)pervertex_prog, (unsigned int *)pervertex_regs);
  530
+	pervertex_prog_length = fpvm_default_schedule(&pvv_fragment, (unsigned int *)pervertex_prog, (unsigned int *)pervertex_regs);
531 531
 	if(pervertex_prog_length < 0) {
532 532
 		printf("EVL: per-vertex VLIW scheduling failed\n");
533 533
 		return 0;
28  software/include/fpvm/gfpus.h → software/include/fpvm/schedulers.h
... ...
@@ -1,6 +1,6 @@
1 1
 /*
2 2
  * Milkymist SoC (Software)
3  
- * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
  3
+ * Copyright (C) 2007, 2008, 2009, 2010, 2011 Sebastien Bourdeauducq
4 4
  *
5 5
  * This program is free software: you can redistribute it and/or modify
6 6
  * it under the terms of the GNU General Public License as published by
@@ -15,6 +15,17 @@
15 15
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 16
  */
17 17
 
  18
+
  19
+#ifndef __FPVM_SCHEDULERS_H
  20
+#define __FPVM_SCHEDULERS_H
  21
+
  22
+#include <fpvm/fpvm.h>
  23
+
  24
+/*
  25
+ * code must be able to hold PFPU_PROGSIZE 32-bit instructions.
  26
+ * registers must be able to hold PFPU_REG_COUNT 32-bit values.
  27
+ */
  28
+
18 29
 /*
19 30
  * Greedy Floating Point Unit Scheduler
20 31
  * This program takes FPVM code and performs greedy VLIW scheduling
@@ -23,17 +34,14 @@
23 34
  * program and takes the first schedulable instruction, without trying to
24 35
  * optimize the order of instructions.
25 36
  */
26  
-
27  
-#ifndef __FPVM_GFPUS_H
28  
-#define __FPVM_GFPUS_H
29  
-
30  
-#include <fpvm/fpvm.h>
  37
+int gfpus_schedule(struct fpvm_fragment *fragment, unsigned int *code, unsigned int *registers);
31 38
 
32 39
 /*
33  
- * code must be able to hold PFPU_PROGSIZE 32-bit instructions.
34  
- * registers must be able to hold PFPU_REG_COUNT 32-bit values.
  40
+ * Lean New / Lamely Named Floating Point Unit Scheduler
  41
+ * A smarter, faster, optimizing scheduler by Werner Almesberger.
35 42
  */
  43
+int lnfpus_schedule(struct fpvm_fragment *fragment, unsigned int *code, unsigned int *registers);
36 44
 
37  
-int gfpus_schedule(struct fpvm_fragment *fragment, unsigned int *code, unsigned int *registers);
  45
+#define fpvm_default_schedule lnfpus_schedule
38 46
 
39  
-#endif /* __FPVM_GFPUS_H */
  47
+#endif /* __FPVM_SCHEDULERS_H */
14  software/libfpvm/Makefile
... ...
@@ -1,7 +1,7 @@
1 1
 MMDIR=../..
2 2
 include $(MMDIR)/software/include.mak
3 3
 
4  
-OBJECTS=fpvm.o parser_helper.o scanner.o parser.o gfpus.o pfpu.o
  4
+OBJECTS=fpvm.o parser_helper.o scanner.o parser.o gfpus.o lnfpus.o pfpu.o
5 5
 
6 6
 all: libfpvm.a
7 7
 
@@ -39,8 +39,16 @@ gfpus.o: ../../software/include/base/stdio.h
39 39
 gfpus.o: ../../software/include/base/stdlib.h
40 40
 gfpus.o: ../../software/include/fpvm/is.h ../../software/include/fpvm/fpvm.h
41 41
 gfpus.o: ../../software/include/fpvm/pfpu.h
42  
-gfpus.o: ../../software/include/fpvm/gfpus.h ../../software/include/hw/pfpu.h
43  
-gfpus.o: ../../software/include/hw/common.h
  42
+gfpus.o: ../../software/include/fpvm/schedulers.h
  43
+gfpus.o: ../../software/include/hw/pfpu.h ../../software/include/hw/common.h
  44
+lnfpus.o: ../../software/include/base/stdlib.h
  45
+lnfpus.o: ../../software/include/base/stdio.h
  46
+lnfpus.o: ../../software/include/base/string.h
  47
+lnfpus.o: ../../software/include/base/assert.h
  48
+lnfpus.o: ../../software/include/fpvm/is.h ../../software/include/fpvm/fpvm.h
  49
+lnfpus.o: ../../software/include/fpvm/pfpu.h
  50
+lnfpus.o: ../../software/include/fpvm/schedulers.h
  51
+lnfpus.o: ../../software/include/hw/pfpu.h ../../software/include/hw/common.h
44 52
 parser.o: ../../software/include/base/stdio.h
45 53
 parser.o: ../../software/include/base/stdlib.h
46 54
 parser.o: ../../software/include/base/assert.h
4  software/libfpvm/gfpus.c
... ...
@@ -1,6 +1,6 @@
1 1
 /*
2 2
  * Milkymist SoC (Software)
3  
- * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
  3
+ * Copyright (C) 2007, 2008, 2009, 2010, 2011 Sebastien Bourdeauducq
4 4
  *
5 5
  * This program is free software: you can redistribute it and/or modify
6 6
  * it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@
20 20
 #include <fpvm/is.h>
21 21
 #include <fpvm/fpvm.h>
22 22
 #include <fpvm/pfpu.h>
23  
-#include <fpvm/gfpus.h>
  23
+#include <fpvm/schedulers.h>
24 24
 
25 25
 #include <hw/pfpu.h>
26 26
 
666  software/libfpvm/lnfpus.c
... ...
@@ -0,0 +1,666 @@
  1
+/*
  2
+ * lnfpus.c - O(n) ... O(n^2) scheduler
  3
+ *
  4
+ * Copyright (C) 2011 Werner Almesberger
  5
+ *
  6
+ * Based on gfpus.c
  7
+ * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
  8
+ *
  9
+ * This program is free software: you can redistribute it and/or modify
  10
+ * it under the terms of the GNU General Public License as published by
  11
+ * the Free Software Foundation, version 3 of the License.
  12
+ *
  13
+ * This program is distributed in the hope that it will be useful,
  14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16
+ * GNU General Public License for more details.
  17
+ *
  18
+ * You should have received a copy of the GNU General Public License
  19
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20
+ */
  21
+
  22
+#include <stdlib.h>
  23
+#include <stdio.h>
  24
+#include <string.h>
  25
+#include <assert.h>
  26
+
  27
+#include <fpvm/is.h>
  28
+#include <fpvm/fpvm.h>
  29
+#include <fpvm/pfpu.h>
  30
+#include <fpvm/schedulers.h>
  31
+
  32
+#include <hw/pfpu.h>
  33
+
  34
+
  35
+//#define	REG_STATS
  36
+#define	LCPF	/* longest critical path first */
  37
+
  38
+//#define DEBUG
  39
+#ifdef DEBUG
  40
+#define	Dprintf printf
  41
+#else
  42
+#define	Dprintf(...)
  43
+#endif
  44
+
  45
+
  46
+#define	MAX_LATENCY	8	/* maximum latency; okay to make this bigger */
  47
+
  48
+#define	CODE(n)		(((pfpu_instruction *) (code+(n)))->i)
  49
+
  50
+
  51
+struct list {
  52
+	struct list *next, *prev;
  53
+};
  54
+
  55
+
  56
+struct insn {
  57
+	struct list more;		/* more insns on same schedule */
  58
+	struct fpvm_instruction *vm_insn;
  59
+	struct data_ref {
  60
+		struct list more;	/* more refs sharing the data */
  61
+		struct insn *insn;	/* insn this is part of */
  62
+		struct insn *dep;	/* insn we depend on */
  63
+	} opa, opb, dest, cond;
  64
+	int arity;
  65
+	int latency;
  66
+	int rmw;	/* non-zero if instruction is read-modify-write */
  67
+	int unresolved;	/* number of data refs we need before we can sched */
  68
+	int earliest;	/* earliest cycle dependencies seen so far are met */
  69
+	struct list dependants;	/* list of dependencies (constant) */
  70
+	int num_dependants;	/* number of dependencies */
  71
+	struct insn *next_setter; /* next setter of the same register */
  72
+#ifdef LCPF
  73
+	int distance;		/* minimum cycles on this path until the end */
  74
+#endif
  75
+};
  76
+
  77
+
  78
+struct vm_reg {
  79
+	struct insn *setter;	/* instruction setting it; NULL if none */
  80
+	struct insn *first_setter; /* first setter */
  81
+	int pfpu_reg;		/* underlying PFPU register */
  82
+	int refs;		/* usage count */
  83
+};
  84
+
  85
+
  86
+struct pfpu_reg {
  87
+	struct list more;	/* list of unallocated PFPU registers */
  88
+	int vm_reg;		/* corresponding FPVM register if allocated */
  89
+	int used;		/* used somewhere in the program */
  90
+};
  91
+
  92
+
  93
+static struct sched_ctx {
  94
+	struct fpvm_fragment *frag;
  95
+	struct insn insns[FPVM_MAXCODELEN];
  96
+	struct vm_reg *regs;	/* dynamically allocated */
  97
+	struct pfpu_reg pfpu_regs[PFPU_REG_COUNT];
  98
+	struct list unallocated; /* unallocated registers */
  99
+	struct list unscheduled; /* unscheduled insns */
  100
+	struct list waiting;	/* insns waiting to be scheduled */
  101
+	struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
  102
+	int cycle;		/* the current cycle */
  103
+#ifdef REG_STATS
  104
+	int max_regs, curr_regs;	/* allocation statistics */
  105
+#endif
  106
+} *sc;
  107
+
  108
+
  109
+/* ----- Register initialization ------------------------------------------- */
  110
+
  111
+
  112
+/*
  113
+ * Straight from gfpus.c, only with some whitespace changes.
  114
+ */
  115
+
  116
+static void get_registers(struct fpvm_fragment *fragment,
  117
+    unsigned int *registers)
  118
+{
  119
+	int i;
  120
+	union {
  121
+		float f;
  122
+		unsigned int n;
  123
+	} fconv;
  124
+
  125
+	for(i = 0; i < fragment->nbindings; i++)
  126
+		if(fragment->bindings[i].isvar)
  127
+			registers[i] = 0;
  128
+		else {
  129
+			fconv.f = fragment->bindings[i].b.c;
  130
+			registers[i] = fconv.n;
  131
+		}
  132
+	for(; i < PFPU_REG_COUNT; i++)
  133
+		registers[i] = 0;
  134
+}
  135
+
  136
+
  137
+/* ----- Doubly-linked list ------------------------------------------------ */
  138
+
  139
+
  140
+/*
  141
+ * Use the naming conventions of include/linux/list.h
  142
+ */
  143
+
  144
+
  145
+#ifdef DEBUG
  146
+
  147
+static void list_poison(struct list *list)
  148
+{
  149
+	list->next = list->prev = NULL;
  150
+}
  151
+
  152
+#else /* DEBUG */
  153
+
  154
+#define list_poison(list)
  155
+
  156
+#endif /* !DEBUG */
  157
+
  158
+
  159
+static void list_init(struct list *list)
  160
+{
  161
+	list->next = list->prev = list;
  162
+}
  163
+
  164
+
  165
+static void list_del(struct list *item)
  166
+{
  167
+	assert(item->next != item);
  168
+	item->prev->next = item->next;
  169
+	item->next->prev = item->prev;
  170
+	list_poison(item);
  171
+}
  172
+
  173
+
  174
+static void *list_pop(struct list *list)
  175
+{
  176
+	struct list *first;
  177
+
  178
+	first = list->next;
  179
+	if(first == list)
  180
+		return NULL;
  181
+	list_del(first);
  182
+	return first;
  183
+}
  184
+
  185
+
  186
+static void list_add_tail(struct list *list, struct list *item)
  187
+{
  188
+	item->next = list;
  189
+	item->prev = list->prev;
  190
+	list->prev->next = item;
  191
+	list->prev = item;
  192
+}
  193
+
  194
+
  195
+static void list_add(struct list *list, struct list *item)
  196
+{
  197
+	item->next = list->next;
  198
+	item->prev = list;
  199
+	list->next->prev = item;
  200
+	list->next = item;
  201
+}
  202
+
  203
+
  204
+static void list_concat(struct list *a, struct list *b)
  205
+{
  206
+	if(b->next != b) {
  207
+		a->prev->next = b->next;
  208
+		b->next->prev = a->prev;
  209
+		b->prev->next = a;
  210
+		a->prev = b->prev;
  211
+	}
  212
+	list_poison(b);
  213
+}
  214
+
  215
+
  216
+/*
  217
+ * Do not delete elements from the list while traversing it with foreach !
  218
+ */
  219
+
  220
+#define	foreach(var, head) \
  221
+	for(var = (void *) ((head))->next; \
  222
+	    (var) != (void *) (head); \
  223
+	    var = (void *) ((struct list *) (var))->next)
  224
+
  225
+
  226
+/* ----- Register management ----------------------------------------------- */
  227
+
  228
+
  229
+static int vm_reg2idx(int reg)
  230
+{
  231
+	return reg >= 0 ? reg : sc->frag->nbindings-reg;
  232
+}
  233
+
  234
+
  235
+static int alloc_reg(struct insn *setter)
  236
+{
  237
+	struct pfpu_reg *reg;
  238
+	int vm_reg, pfpu_reg, vm_idx;
  239
+
  240
+	vm_reg = setter->vm_insn->dest;
  241
+	if(vm_reg >= 0) {
  242
+		pfpu_reg = vm_reg;
  243
+		sc->pfpu_regs[vm_reg].vm_reg = vm_reg; /* @@@ global init */
  244
+	} else {
  245
+		reg = list_pop(&sc->unallocated);
  246
+		if(!reg)
  247
+			return -1;
  248
+
  249
+		#ifdef REG_STATS
  250
+		sc->curr_regs++;
  251
+		if(sc->curr_regs > sc->max_regs)
  252
+			sc->max_regs = sc->curr_regs;
  253
+		#endif
  254
+
  255
+		reg->vm_reg = vm_reg;
  256
+		pfpu_reg = reg-sc->pfpu_regs;
  257
+	}
  258
+
  259
+	Dprintf("  alloc reg %d -> %d\n", vm_reg, pfpu_reg);
  260
+
  261
+	vm_idx = vm_reg2idx(vm_reg);
  262
+	sc->regs[vm_idx].setter = setter;
  263
+	sc->regs[vm_idx].pfpu_reg = pfpu_reg;
  264
+	sc->regs[vm_idx].refs = setter->num_dependants+1;
  265
+
  266
+	return pfpu_reg;
  267
+}
  268
+
  269
+
  270
+static void put_reg(int vm_reg)
  271
+{
  272
+	int vm_idx;
  273
+	struct vm_reg *reg;
  274
+
  275
+	if(vm_reg >= 0)
  276
+		return;
  277
+
  278
+	vm_idx = vm_reg2idx(vm_reg);
  279
+	reg = sc->regs+vm_idx;
  280
+
  281
+	assert(reg->refs);
  282
+	if(--reg->refs)
  283
+		return;
  284
+
  285
+	Dprintf("  free reg %d\n", reg->pfpu_reg);
  286
+
  287
+#ifdef REG_STATS
  288
+	assert(sc->curr_regs);
  289
+	sc->curr_regs--;
  290
+#endif
  291
+
  292
+	/*
  293
+	 * Prepend so that register numbers stay small and bugs reveal
  294
+	 * themselves more rapidly.
  295
+	 */
  296
+	list_add(&sc->unallocated, &sc->pfpu_regs[reg->pfpu_reg].more);
  297
+
  298
+	/* clear it for style only */
  299
+	reg->setter = NULL;
  300
+	reg->pfpu_reg = 0;
  301
+}
  302
+
  303
+
  304
+static int lookup_pfpu_reg(int vm_reg)
  305
+{
  306
+	return vm_reg >= 0 ? vm_reg : sc->regs[vm_reg2idx(vm_reg)].pfpu_reg;
  307
+}
  308
+
  309
+
  310
+static void mark(int vm_reg)
  311
+{
  312
+	if(vm_reg > 0)
  313
+		sc->pfpu_regs[vm_reg].used = 1;
  314
+}
  315
+
  316
+
  317
+static int init_registers(struct fpvm_fragment *frag,
  318
+    unsigned int *registers)
  319
+{
  320
+	int i;
  321
+
  322
+	sc->regs =
  323
+	    calloc(frag->nbindings-frag->next_sur, sizeof(struct vm_reg));
  324
+	if(!sc->regs)
  325
+		return -1;
  326
+
  327
+	get_registers(frag, registers);
  328
+
  329
+	for(i = 0; i != frag->ninstructions; i++) {
  330
+		mark(frag->code[i].opa);
  331
+		mark(frag->code[i].opb);
  332
+		mark(frag->code[i].dest);
  333
+	}
  334
+
  335
+	list_init(&sc->unallocated);
  336
+	for(i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
  337
+		if(!sc->pfpu_regs[i].used)
  338
+			list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more);
  339
+
  340
+	return 0;
  341
+}
  342
+
  343
+
  344
+/* ----- Instruction scheduler --------------------------------------------- */
  345
+
  346
+
  347
+static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
  348
+    int reg_num)
  349
+{
  350
+	struct vm_reg *reg;
  351
+
  352
+	reg = sc->regs+vm_reg2idx(reg_num);
  353
+	ref->insn = insn;
  354
+	ref->dep = reg->setter;
  355
+	if(insn->vm_insn->dest == reg_num)
  356
+		insn->rmw = 1;
  357
+	if(!ref->dep)
  358
+		reg->refs++;
  359
+	else {
  360
+		list_add_tail(&ref->dep->dependants, &ref->more);
  361
+		ref->dep->num_dependants++;
  362
+		insn->unresolved++;
  363
+
  364
+		Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
  365
+		    insn-sc->insns, reg_num, reg->setter-sc->insns,
  366
+		    insn->unresolved);
  367
+	}
  368
+	return reg;
  369
+}
  370
+
  371
+
  372
+static void init_scheduler(struct fpvm_fragment *frag)
  373
+{
  374
+	int i;
  375
+	struct insn *insn;
  376
+	struct vm_reg *reg;
  377
+	struct data_ref *ref;
  378
+
  379
+	list_init(&sc->unscheduled);
  380
+	list_init(&sc->waiting);
  381
+	for(i = 0; i != PFPU_PROGSIZE; i++)
  382
+		list_init(sc->ready+i);
  383
+
  384
+	for(i = 0; i != frag->ninstructions; i++) {
  385
+		insn = sc->insns+i;
  386
+		insn->vm_insn = frag->code+i;
  387
+		insn->arity = fpvm_get_arity(frag->code[i].opcode);
  388
+		insn->latency = pfpu_get_latency(frag->code[i].opcode);
  389
+		list_init(&insn->dependants);
  390
+		switch (insn->arity) {
  391
+			case 3:
  392
+				add_data_ref(insn, &insn->cond, FPVM_REG_IFB);
  393
+				/* fall through */
  394
+			case 2:
  395
+				add_data_ref(insn, &insn->opb, frag->code[i].opb);
  396
+				/* fall through */
  397
+			case 1:
  398
+				add_data_ref(insn, &insn->opa, frag->code[i].opa);
  399
+				/* fall through */
  400
+			case 0:
  401
+				reg = sc->regs+vm_reg2idx(frag->code[i].dest);
  402
+				if(reg->setter) {
  403
+					reg->setter->next_setter = insn;
  404
+					foreach(ref, &reg->setter->dependants)
  405
+						if(ref->insn != insn)
  406
+							insn->unresolved++;
  407
+					if(!insn->rmw)
  408
+						insn->unresolved++;
  409
+				} else {
  410
+					if(!insn->rmw)
  411
+						insn->unresolved += reg->refs;
  412
+					reg->first_setter = insn;
  413
+				}
  414
+				reg->setter = insn;
  415
+				break;
  416
+			default:
  417
+				abort();
  418
+		}
  419
+		if(insn->unresolved)
  420
+			list_add_tail(&sc->unscheduled, &insn->more);
  421
+		else
  422
+			list_add_tail(&sc->ready[0], &insn->more);
  423
+	}
  424
+
  425
+#ifdef LCPF
  426
+	struct data_ref *dep;
  427
+
  428
+	for(i = frag->ninstructions-1; i >= 0; i--) {
  429
+		insn = sc->insns+i;
  430
+#if 0
  431
+		/*
  432
+		 * Theoretically, we should consider the distance through
  433
+		 * write-write dependencies too. In practice, this would
  434
+		 * mainly matter if we had operations whose result is ignored.
  435
+		 * This is a degenerate case that's probably not worth
  436
+		 * spending much effort on.
  437
+		 */
  438
+		if(insn->next_setter) {
  439
+			insn->distance =
  440
+			    insn->next_setter->distance-insn->distance+1;
  441
+			if(insn->distance < 1)
  442
+				insn->distance = 1;
  443
+		}
  444
+#endif
  445
+		foreach(dep, &insn->dependants)
  446
+			if(dep->insn->distance > insn->distance)
  447
+				insn->distance = dep->insn->distance;
  448
+		/*
  449
+		 * While it would be more correct to add one for the cycle
  450
+		 * following the write cycle, this also has the effect of
  451
+		 * producing slighly worse results on the example set of
  452
+		 * patches. Let's thus keep this "bug" for now.
  453
+		 */
  454
+//		insn->distance += insn->latency+1;
  455
+		insn->distance += insn->latency;
  456
+	}
  457
+#endif
  458
+}
  459
+
  460
+
  461
+static void unblock(struct insn *insn)
  462
+{
  463
+	int slot;
  464
+
  465
+	assert(insn->unresolved);
  466
+	if(--insn->unresolved)
  467
+		return;
  468
+	Dprintf("  unblocked %lu -> %u\n", insn-sc->insns, insn->earliest);
  469
+	list_del(&insn->more);
  470
+	slot = insn->earliest;
  471
+	if(slot <= sc->cycle)
  472
+		slot = sc->cycle+1;
  473
+	list_add_tail(sc->ready+slot, &insn->more);
  474
+}
  475
+
  476
+
  477
+static void put_reg_by_ref(struct data_ref *ref, int vm_reg)
  478
+{
  479
+	struct insn *setter = ref->dep;
  480
+	struct vm_reg *reg;
  481
+
  482
+	if(setter) {
  483
+		put_reg(setter->vm_insn->dest);
  484
+		if(setter->next_setter && setter->next_setter != ref->insn)
  485
+			unblock(setter->next_setter);
  486
+	} else {
  487
+		reg = sc->regs+vm_reg2idx(vm_reg);
  488
+		if(reg->first_setter && !reg->first_setter->rmw)
  489
+			unblock(reg->first_setter);
  490
+	}
  491
+}
  492
+
  493
+
  494
+static void unblock_after(struct insn *insn, int cycle)
  495
+{
  496
+	if(insn->earliest <= cycle)
  497
+		insn->earliest = cycle+1;
  498
+	unblock(insn);
  499
+}
  500
+
  501
+
  502
+static int issue(struct insn *insn, unsigned *code)
  503
+{
  504
+	struct data_ref *ref;
  505
+	int end, reg;
  506
+
  507
+	end = sc->cycle+insn->latency;
  508
+
  509
+	Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", sc->cycle,
  510
+	    insn-sc->insns, insn->latency, insn->vm_insn->opa,
  511
+	    insn->vm_insn->opb);
  512
+
  513
+	switch (insn->arity) {
  514
+		case 3:
  515
+			put_reg_by_ref(&insn->cond, FPVM_REG_IFB);
  516
+			/* fall through */
  517
+		case 2:
  518
+			CODE(sc->cycle).opb = lookup_pfpu_reg(insn->vm_insn->opb);
  519
+			put_reg_by_ref(&insn->opb, insn->vm_insn->opb);
  520
+			/* fall through */
  521
+		case 1:
  522
+			CODE(sc->cycle).opa = lookup_pfpu_reg(insn->vm_insn->opa);
  523
+			put_reg_by_ref(&insn->opa, insn->vm_insn->opa);
  524
+			break;
  525
+		case 0:
  526
+			break;
  527
+		default:
  528
+			abort();
  529
+	}
  530
+
  531
+	reg = alloc_reg(insn);
  532
+	if(reg < 0)
  533
+		return -1;
  534
+	CODE(end).dest = reg;
  535
+	CODE(sc->cycle).opcode = fpvm_to_pfpu(insn->vm_insn->opcode);
  536
+
  537
+	foreach(ref, &insn->dependants)
  538
+		unblock_after(ref->insn, end);
  539
+	if(insn->next_setter && !insn->next_setter->rmw)
  540
+		unblock_after(insn->next_setter,
  541
+		    end-insn->next_setter->latency);
  542
+
  543
+	return 0;
  544
+}
  545
+
  546
+
  547
+#ifdef DEBUG
  548
+static int count(const struct list *list)
  549
+{
  550
+	int n = 0;
  551
+	const struct list *p;
  552
+
  553
+	for(p = list->next; p != list; p = p->next)
  554
+		n++;
  555
+	return n;
  556
+}
  557
+#endif
  558
+
  559
+
  560
+static int schedule(unsigned int *code)
  561
+{
  562
+	int remaining;
  563
+	int i, last, end;
  564
+	struct insn *insn;
  565
+	struct insn *best;
  566
+
  567
+	remaining = sc->frag->ninstructions;
  568
+	for(i = 0; remaining; i++) {
  569
+		if(i == PFPU_PROGSIZE)
  570
+			return -1;
  571
+
  572
+		sc->cycle = i;
  573
+		Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
  574
+		    i, remaining, count(&sc->waiting), count(&sc->ready[i]));
  575
+
  576
+		list_concat(&sc->waiting, sc->ready+i);
  577
+		best = NULL;
  578
+		foreach(insn, &sc->waiting) {
  579
+			end = i+insn->latency;
  580
+			if(end >= PFPU_PROGSIZE)
  581
+				return -1;
  582
+			if(!CODE(end).dest) {
  583
+#ifdef LCPF
  584
+				if(!best || best->distance < insn->distance)
  585
+					best = insn;
  586
+#else
  587
+				best = insn;
  588
+				break;
  589
+#endif
  590
+			}
  591
+		}
  592
+		if(best) {
  593
+			if(issue(best, code) < 0)
  594
+				return -1;
  595
+			list_del(&best->more);
  596
+			remaining--;
  597
+		}
  598
+		if(CODE(i).dest)
  599
+			put_reg(sc->pfpu_regs[CODE(i).dest].vm_reg);
  600
+	}
  601
+
  602
+	/*
  603
+	 * Add NOPs to cover unfinished instructions.
  604
+	 */
  605
+	last = i;
  606
+	end = i+MAX_LATENCY;
  607
+	if(end > PFPU_PROGSIZE)
  608
+		end = PFPU_PROGSIZE;
  609
+	while(i != end) {
  610
+		if(CODE(i).dest)
  611
+			last = i+1;
  612
+		i++;
  613
+	}
  614
+	return last;
  615
+}
  616
+
  617
+
  618
+static int init_scheduler_context(struct fpvm_fragment *frag,
  619
+    unsigned int *reg)
  620
+{
  621
+	sc = calloc(1, sizeof(*sc));
  622
+	if(!sc)
  623
+		return -1;
  624
+
  625
+	sc->frag = frag;
  626
+
  627
+	if(init_registers(frag, reg) < 0) {
  628
+		free(sc);
  629
+		return -1;
  630
+	}
  631
+
  632
+	init_scheduler(frag);
  633
+	return 0;
  634
+}
  635
+
  636
+
  637
+int lnfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
  638
+    unsigned int *reg)
  639
+{
  640
+	pfpu_instruction vecout;
  641
+	int res;
  642
+
  643
+	if(init_scheduler_context(frag, reg) < 0)
  644
+		return -1;
  645
+	memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
  646
+	res = schedule(code);
  647
+
  648
+#ifdef REG_STATS
  649
+	printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs);
  650
+#endif
  651
+
  652
+	free(sc->regs);
  653
+	free(sc);
  654
+	if(res < 0)
  655
+		return res;
  656
+	if(frag->vector_mode)
  657
+		return res;
  658
+	if(res == PFPU_PROGSIZE)
  659
+		return -1;
  660
+
  661
+	vecout.w = 0;
  662
+	vecout.i.opcode = FPVM_OPCODE_VECTOUT;
  663
+	code[res] = vecout.w;
  664
+
  665
+	return res+1;
  666
+}
2  software/libfpvm/subdir.mak
... ...
@@ -1,4 +1,4 @@
1  
-OBJECTS=fpvm.o parser_helper.o scanner.o parser.o gfpus.o pfpu.o
  1
+OBJECTS=fpvm.o parser_helper.o scanner.o parser.o gfpus.o lnfpus.o pfpu.o
2 2
 
3 3
 all: libfpvm.a
4 4
 
10  software/libfpvm/x86-linux/test.c
... ...
@@ -1,6 +1,6 @@
1 1
 /*
2 2
  * Milkymist SoC (Software)
3  
- * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
  3
+ * Copyright (C) 2007, 2008, 2009, 2010, 2011 Sebastien Bourdeauducq
4 4
  *
5 5
  * This program is free software: you can redistribute it and/or modify
6 6
  * it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@
18 18
 #include <stdio.h>
19 19
 
20 20
 #include <fpvm/fpvm.h>
21  
-#include <fpvm/gfpus.h>
  21
+#include <fpvm/schedulers.h>
22 22
 #include <fpvm/pfpu.h>
23 23
 #include <hw/pfpu.h>
24 24
 
@@ -40,7 +40,7 @@ int main(int argc, char *argv[])
40 40
 	fpvm_finalize(&frag);
41 41
 	fpvm_dump(&frag);
42 42
 	printf("== PFPU:\n");
43  
-	len = gfpus_schedule(&frag, code, registers);
  43
+	len = fpvm_default_schedule(&frag, code, registers);
44 44
 	if(len > 0)
45 45
 		pfpu_dump(code, len);
46 46
 
@@ -60,7 +60,7 @@ int main(int argc, char *argv[])
60 60
 	fpvm_finalize(&frag);
61 61
 	fpvm_dump(&frag);
62 62
 	printf("== PFPU:\n");
63  
-	len = gfpus_schedule(&frag, code, registers);
  63
+	len = fpvm_default_schedule(&frag, code, registers);
64 64
 	if(len > 0)
65 65
 		pfpu_dump(code, len);
66 66
 
@@ -73,7 +73,7 @@ int main(int argc, char *argv[])
73 73
 	fpvm_finalize(&frag);
74 74
 	fpvm_dump(&frag);
75 75
 	printf("== PFPU:\n");
76  
-	len = gfpus_schedule(&frag, code, registers);
  76
+	len = fpvm_default_schedule(&frag, code, registers);
77 77
 	if(len > 0)
78 78
 		pfpu_dump(code, len);
79 79
 	

0 notes on commit 0c4c26d

Please sign in to comment.
Something went wrong with that request. Please try again.