Permalink
Browse files

Revision 3: Used generic multipliers in unrolled versions

  • Loading branch information...
pzemtsov committed May 6, 2014
1 parent 9c85cf3 commit f8f03136427227494847db0d52cc726fc0e629d8
Showing with 46 additions and 64 deletions.
  1. +11 −64 e1.cpp
  2. +35 −0 mymacros.h
View
75 e1.cpp
@@ -1,5 +1,6 @@
/** Revision 1: Created. Converted all non-inlined versions from Java to C++
Revision 2: Converted all unrolled versions from Java to C++
+ Revision 3: Used generic multipliers in unrolled versions
*/
#include <cassert>
@@ -9,6 +10,7 @@
#include <typeinfo>
#include "timer.h"
+#include "mymacros.h"
typedef unsigned char byte;
@@ -142,24 +144,7 @@ class Dst_First_3 : public Demux
#define MOVE_BYTE(i,j) d[i] = src [(j)+(i)*32]
-#define MOVE_BYTES_64(j) do {\
- MOVE_BYTE ( 0, j); MOVE_BYTE ( 1, j); MOVE_BYTE ( 2, j); MOVE_BYTE ( 3, j);\
- MOVE_BYTE ( 4, j); MOVE_BYTE ( 5, j); MOVE_BYTE ( 6, j); MOVE_BYTE ( 7, j);\
- MOVE_BYTE ( 8, j); MOVE_BYTE ( 9, j); MOVE_BYTE (10, j); MOVE_BYTE (11, j);\
- MOVE_BYTE (12, j); MOVE_BYTE (13, j); MOVE_BYTE (14, j); MOVE_BYTE (15, j);\
- MOVE_BYTE (16, j); MOVE_BYTE (17, j); MOVE_BYTE (18, j); MOVE_BYTE (19, j);\
- MOVE_BYTE (20, j); MOVE_BYTE (21, j); MOVE_BYTE (22, j); MOVE_BYTE (23, j);\
- MOVE_BYTE (24, j); MOVE_BYTE (25, j); MOVE_BYTE (26, j); MOVE_BYTE (27, j);\
- MOVE_BYTE (28, j); MOVE_BYTE (29, j); MOVE_BYTE (30, j); MOVE_BYTE (31, j);\
- MOVE_BYTE (32, j); MOVE_BYTE (33, j); MOVE_BYTE (34, j); MOVE_BYTE (35, j);\
- MOVE_BYTE (36, j); MOVE_BYTE (37, j); MOVE_BYTE (38, j); MOVE_BYTE (39, j);\
- MOVE_BYTE (40, j); MOVE_BYTE (41, j); MOVE_BYTE (42, j); MOVE_BYTE (43, j);\
- MOVE_BYTE (44, j); MOVE_BYTE (45, j); MOVE_BYTE (46, j); MOVE_BYTE (47, j);\
- MOVE_BYTE (48, j); MOVE_BYTE (49, j); MOVE_BYTE (50, j); MOVE_BYTE (51, j);\
- MOVE_BYTE (52, j); MOVE_BYTE (53, j); MOVE_BYTE (54, j); MOVE_BYTE (55, j);\
- MOVE_BYTE (56, j); MOVE_BYTE (57, j); MOVE_BYTE (58, j); MOVE_BYTE (59, j);\
- MOVE_BYTE (60, j); MOVE_BYTE (61, j); MOVE_BYTE (62, j); MOVE_BYTE (63, j);\
- } while (0)
+#define MOVE_BYTES_64(j) DUP2_64 (MOVE_BYTE,j)
#define MOVE_TIMESLOT(j) do {\
byte * const d = dst[j];\
@@ -191,8 +176,7 @@ class Unrolled_1_2 : public Demux
assert (src_length == NUM_TIMESLOTS * DST_SIZE);
for (unsigned j = 0; j < NUM_TIMESLOTS; j+=2) {
- MOVE_TIMESLOT (j);
- MOVE_TIMESLOT (j+1);
+ DUP_2_ (MOVE_TIMESLOT, j);
}
}
};
@@ -206,10 +190,7 @@ class Unrolled_1_4 : public Demux
assert (DST_SIZE == 64);
assert (src_length == NUM_TIMESLOTS * DST_SIZE);
for (unsigned j = 0; j < NUM_TIMESLOTS; j+=4) {
- MOVE_TIMESLOT (j);
- MOVE_TIMESLOT (j+1);
- MOVE_TIMESLOT (j+2);
- MOVE_TIMESLOT (j+3);
+ DUP_4_ (MOVE_TIMESLOT, j);
}
}
};
@@ -223,14 +204,7 @@ class Unrolled_1_8 : public Demux
assert (DST_SIZE == 64);
assert (src_length == NUM_TIMESLOTS * DST_SIZE);
for (unsigned j = 0; j < NUM_TIMESLOTS; j+=8) {
- MOVE_TIMESLOT (j);
- MOVE_TIMESLOT (j+1);
- MOVE_TIMESLOT (j+2);
- MOVE_TIMESLOT (j+3);
- MOVE_TIMESLOT (j+4);
- MOVE_TIMESLOT (j+5);
- MOVE_TIMESLOT (j+6);
- MOVE_TIMESLOT (j+7);
+ DUP_8_ (MOVE_TIMESLOT, j);
}
}
};
@@ -244,22 +218,7 @@ class Unrolled_1_16 : public Demux
assert (DST_SIZE == 64);
assert (src_length == NUM_TIMESLOTS * DST_SIZE);
for (unsigned j = 0; j < NUM_TIMESLOTS; j+=16) {
- MOVE_TIMESLOT (j);
- MOVE_TIMESLOT (j+1);
- MOVE_TIMESLOT (j+2);
- MOVE_TIMESLOT (j+3);
- MOVE_TIMESLOT (j+4);
- MOVE_TIMESLOT (j+5);
- MOVE_TIMESLOT (j+6);
- MOVE_TIMESLOT (j+7);
- MOVE_TIMESLOT (j+8);
- MOVE_TIMESLOT (j+9);
- MOVE_TIMESLOT (j+10);
- MOVE_TIMESLOT (j+11);
- MOVE_TIMESLOT (j+12);
- MOVE_TIMESLOT (j+13);
- MOVE_TIMESLOT (j+14);
- MOVE_TIMESLOT (j+15);
+ DUP_16_ (MOVE_TIMESLOT, j);
}
}
};
@@ -273,14 +232,7 @@ class Unrolled_2_Full : public Demux
assert (DST_SIZE == 64);
assert (src_length == NUM_TIMESLOTS * DST_SIZE);
- MOVE_TIMESLOT ( 0); MOVE_TIMESLOT ( 1); MOVE_TIMESLOT ( 2); MOVE_TIMESLOT ( 3);
- MOVE_TIMESLOT ( 4); MOVE_TIMESLOT ( 5); MOVE_TIMESLOT ( 6); MOVE_TIMESLOT ( 7);
- MOVE_TIMESLOT ( 8); MOVE_TIMESLOT ( 9); MOVE_TIMESLOT (10); MOVE_TIMESLOT (11);
- MOVE_TIMESLOT (12); MOVE_TIMESLOT (13); MOVE_TIMESLOT (14); MOVE_TIMESLOT (15);
- MOVE_TIMESLOT (16); MOVE_TIMESLOT (17); MOVE_TIMESLOT (18); MOVE_TIMESLOT (19);
- MOVE_TIMESLOT (20); MOVE_TIMESLOT (21); MOVE_TIMESLOT (22); MOVE_TIMESLOT (23);
- MOVE_TIMESLOT (24); MOVE_TIMESLOT (25); MOVE_TIMESLOT (26); MOVE_TIMESLOT (27);
- MOVE_TIMESLOT (28); MOVE_TIMESLOT (29); MOVE_TIMESLOT (30); MOVE_TIMESLOT (31);
+ DUP_32 (MOVE_TIMESLOT);
}
};
@@ -333,14 +285,9 @@ class Unrolled_4 : public Demux
assert (src_length == NUM_TIMESLOTS * DST_SIZE);
#define DEMUX(i) demux_0 (src, dst[i], i)
- DEMUX ( 0); DEMUX ( 1); DEMUX ( 2); DEMUX ( 3);
- DEMUX ( 4); DEMUX ( 5); DEMUX ( 6); DEMUX ( 7);
- DEMUX ( 8); DEMUX ( 9); DEMUX (10); DEMUX (11);
- DEMUX (12); DEMUX (13); DEMUX (14); DEMUX (15);
- DEMUX (16); DEMUX (17); DEMUX (18); DEMUX (19);
- DEMUX (20); DEMUX (21); DEMUX (22); DEMUX (23);
- DEMUX (24); DEMUX (25); DEMUX (26); DEMUX (27);
- DEMUX (28); DEMUX (29); DEMUX (30); DEMUX (31);
+
+ DUP_32 (DEMUX);
+
#undef DEMUX
}
View
@@ -0,0 +1,35 @@
+// duplicators for macros with one parameter;
+// DUP_4(p) will be expanded as equivalent of p(0);p(1);p(2);p(3);
+// DUP_4_(p, 10) will be expanded as equivalent of p(10);p(11);p(12);p(13);
+
+#define DUP_2_(macro, index) do { macro (index); macro (index+1); } while (0)
+#define DUP_4_(macro, index) do { DUP_2_ (macro, index); DUP_2_ (macro, index+2); } while (0)
+#define DUP_8_(macro, index) do { DUP_4_ (macro, index); DUP_4_ (macro, index+4); } while (0)
+#define DUP_16_(macro, index) do { DUP_8_ (macro, index); DUP_8_ (macro, index+8); } while (0)
+#define DUP_32_(macro, index) do { DUP_16_(macro, index); DUP_16_(macro, index+16);} while (0)
+#define DUP_64_(macro, index) do { DUP_32_(macro, index); DUP_32_(macro, index+32);} while (0)
+
+#define DUP_2(macro) DUP_2_ (macro, 0)
+#define DUP_4(macro) DUP_4_ (macro, 0)
+#define DUP_8(macro) DUP_8_ (macro, 0)
+#define DUP_16(macro) DUP_16_(macro, 0)
+#define DUP_32(macro) DUP_32_(macro, 0)
+#define DUP_64(macro) DUP_64_(macro, 0)
+
+// duplicators for macros with two parameters;
+// DUP2_4(p, j) will be expanded as equivalent of p(0,j);p(1,j);p(2,j);p(3,j);
+// DUP2_4_(p, 10, j) will be expanded as equivalent of p(10,j);p(11,j);p(12,j);p(13,j);
+
+#define DUP2_2_(macro, index, param) do { macro (index, param); macro (index+1, param); } while (0)
+#define DUP2_4_(macro, index, param) do { DUP2_2_ (macro, index, param); DUP2_2_ (macro, index+2, param); } while (0)
+#define DUP2_8_(macro, index, param) do { DUP2_4_ (macro, index, param); DUP2_4_ (macro, index+4, param); } while (0)
+#define DUP2_16_(macro, index, param) do { DUP2_8_ (macro, index, param); DUP2_8_ (macro, index+8, param); } while (0)
+#define DUP2_32_(macro, index, param) do { DUP2_16_(macro, index, param); DUP2_16_(macro, index+16, param);} while (0)
+#define DUP2_64_(macro, index, param) do { DUP2_32_(macro, index, param); DUP2_32_(macro, index+32, param);} while (0)
+
+#define DUP2_2(macro, param) DUP2_2_ (macro, 0, param)
+#define DUP2_4(macro, param) DUP2_4_ (macro, 0, param)
+#define DUP2_8(macro, param) DUP2_8_ (macro, 0, param)
+#define DUP2_16(macro, param) DUP2_16_(macro, 0, param)
+#define DUP2_32(macro, param) DUP2_32_(macro, 0, param)
+#define DUP2_64(macro, param) DUP2_64_(macro, 0, param)

0 comments on commit f8f0313

Please sign in to comment.