From 886e70312fbb3baa977984812d3b4d0c31ffc3c6 Mon Sep 17 00:00:00 2001 From: Justin Rosner Date: Fri, 6 Jan 2023 07:03:03 -0800 Subject: [PATCH] Revert "FPGA: Remove db query 9 in 2023.0 (#1257)" This reverts commit 86c4aa82aecc328310753a8de247b95ffa5f194d and reenables the db9 code sample in 2023.1 --- .../ReferenceDesigns/db/README.md | 24 +- .../ReferenceDesigns/db/assets/q9.png | Bin 0 -> 38157 bytes .../ReferenceDesigns/db/sample.json | 24 + .../ReferenceDesigns/db/src/CMakeLists.txt | 24 +- .../ReferenceDesigns/db/src/db.cpp | 61 +- .../ReferenceDesigns/db/src/dbdata.cpp | 103 +++ .../ReferenceDesigns/db/src/dbdata.hpp | 5 + .../db/src/query9/pipe_types.hpp | 272 ++++++++ .../db/src/query9/query9_kernel.cpp | 644 ++++++++++++++++++ .../db/src/query9/query9_kernel.hpp | 17 + 10 files changed, 1160 insertions(+), 14 deletions(-) create mode 100644 DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/assets/q9.png create mode 100644 DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/pipe_types.hpp create mode 100644 DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.cpp create mode 100644 DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.hpp diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md index f74e919b50..c66454b764 100644 --- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md @@ -76,12 +76,18 @@ This design leverages concepts discussed in the [FPGA tutorials](/DirectProgramm ### Query Implementations -The following sections describe at a high level how queries 1, 11 and 12 are implemented on the FPGA using a set of generalized database operators (found in `db_utils/`). In the block diagrams below, the blocks are oneAPI kernels, and the arrows represent `pipes` that shows the flow of data from one kernel to another. +The following sections describe at a high level how queries 1, 9, 11 and 12 are implemented on the FPGA using a set of generalized database operators (found in `db_utils/`). In the block diagrams below, the blocks are oneAPI kernels, and the arrows represent `pipes` that shows the flow of data from one kernel to another. #### Query 1 Query 1 is the simplest of the four queries and only uses the `Accumulator` database operator. The query streams in each row of the LINEITEM table and performs computation on each row. +#### Query 9 + +Query 9 is the most complicated of the four queries and utilizes all database operators (`LikeRegex`, `Accumulator`, `MapJoin`, `MergeJoin`, `DuplicateMergeJoin`, and `FifoSort`). The block diagram of the design is shown below. + +![](assets/q9.png) + #### Query 11 Query 11 showcases the `MapJoin` and `FifoSort` database operators. The block diagram of the design is shown below. @@ -101,6 +107,8 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d |`dbdata.cpp` | Contains code to parse the database input files and validate the query output |`dbdata.hpp` | Definitions of database related data structures and parsing functions |`query1/query1_kernel.cpp` | Contains the kernel for Query 1 +|`query9/query9_kernel.cpp` | Contains the kernel for Query 9 +|`query9/pipe_types.cpp` | All data types and instantiations for pipes used in query 9 |`query11/query11_kernel.cpp` | Contains the kernel for Query 11 |`query11/pipe_types.cpp` | All data types and instantiations for pipes used in query 11 |`query12/query12_kernel.cpp` | Contains the kernel for Query 12 @@ -142,7 +150,7 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d cd build cmake .. -DQUERY=1 ``` - `-DQUERY=` can be any of the following query numbers: `1`, `11` or `12`. + `-DQUERY=` can be any of the following query numbers: `1`, `9`, `11` or `12`. 3. Compile the design. (The provided targets match the recommended development flow.) @@ -156,12 +164,14 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d ``` The report resides at `db_report.prj/reports/report.html`. + >**Note**: If you are compiling Query 9 (`-DQUERY=9`), expect a long report generation time. You can download pre-generated reports from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz). + 3. Compile for FPGA hardware (longer compile time, targets FPGA device). ``` make fpga ``` - When building for hardware, the default scale factor is **1**. To use the smaller scale factor of 0.01, add the flag `-DSF_SMALL=1` to the original `cmake` command. For example: `cmake .. -DQUERY=11 -DSF_SMALL=1`. See the [Database files](#database-files) for more information. + When building for hardware, the default scale factor is **1**. To use the smaller scale factor of 0.01, add the flag `-DSF_SMALL=1` to the original `cmake` command. For example: `cmake .. -DQUERY=9 -DSF_SMALL=1`. See the [Database files](#database-files) for more information. (Optional) The hardware compile may take several hours to complete. You can download a pre-compiled binary (compatible with Linux* Ubuntu* 18.04) for an Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz). @@ -176,7 +186,7 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d cd build cmake -G "NMake Makefiles" -DQUERY=1 ``` - `-DQUERY=` can be any of the following query numbers: `1`, `11` or `12`. + `-DQUERY=` can be any of the following query numbers: `1`, `9`, `11` or `12`. 3. Compile the design. (The provided targets match the recommended development flow.) @@ -191,6 +201,8 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d ``` The report resides at `db_report.prj/reports/report.html` directory. + >**Note**: If you are compiling Query 9 (`-DQUERY=9`), expect a long report generation time. + 3. Compile for FPGA hardware (longer compile time, targets FPGA device): ``` nmake fpga @@ -216,7 +228,7 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d ``` ./db.fpga_emu --dbroot=../data/sf0.01 --test ``` - (Optional) Run the design for queries `11` and `12`. + (Optional) Run the design for queries `9`, `11` and `12`. 2. Run the design on an FPGA device. ``` @@ -229,7 +241,7 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d ``` db.fpga_emu.exe --dbroot=../data/sf0.01 --test ``` - (Optional) Run the design for queries `11` and `12`. + (Optional) Run the design for queries `9`, `11` and `12`. 2. Run the sample on an FPGA device. ``` diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/assets/q9.png b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/assets/q9.png new file mode 100644 index 0000000000000000000000000000000000000000..67c479f2f010d6dfb73928223b9817fb642ab112 GIT binary patch literal 38157 zcmeFadsxzU|3BW^-Q9iHHt&0PRx1zdxU|$+nP=Q;O`D}lJRhJUa!N>wCq$&JwYExG zW>OxvQpr%62gDQ7)|8Tzc_cxhBqT*NMIb@o`$D(fTeCe!Rn!wLJNUyU(uLd)~L-wCV3>>h*&Y;Qe!d9&)$k zz5eU(?|*mdtzUn=eflToUw>Wl`U>f9zrOLmhyTB(hFKm_J)t+hw;cPipV{VXe|x$s zs^VMH)>FHWyDu@zi{JOrRC9;d`7@g@FG<_lZaUTZw~VjKmY+UzIp%9GA@7|7vELql z`!=_zelH~FcIUUsC8thxz4`BZT9-{$Z)F}_`s~{7pu3*`aavbpvE(hEh7B8W@W{hs?xM_Rvmj6R4x1-5Gi%Vh72SifZUV9xd^pG){_ac}?rs!tq!He(PuXLe-{c}-@z zWsmTeJdErLKY%?yz{$G&`(6(>#BRi`?>5W3w94~5`Elhz-rtRT`y>bY7*>+^-@|sL z|L30I>hQyhMNWQAn@PxqJxh#->&n}x-YMDqJGHW6^1OttyO)DgUK-kh+tE#b%}AZh z;Z^lytM@*?u|f9Q!-qxdug8l&n%Yz1?6k=$W0l=K#I=eyjpV(un{tSIgHD&4(*H-g z|B>#0Qumv6`k%u7&({6VP5jSI{LjPw|0XzzI$^j2*Y#2_Ua-@`_4*0JTCK*})-Yk1 zVcM5C zYep7vBnjWo#k32=O!?L4*O%rK4Qq&#TB0VGHKQZWa#%m>R8yTRrG`nGK|{d?1+ok> zkJ)_#k-i3jhhALBJsKj`j7;dqkDlKtElUe5lttRDS1A9z0y{|VWS0Jfv()mJY91K& zwBWy?PgJd~>$73^n=$j%1ve5hMzOa#LM*FW8&`0nw4T*kH(Ob1d7*d;-%l`G_7{;| ze)Wns=y219Tglytc=~L?vbJ&WQ1w3&4f-iN!xUEgWX9pU3#$|P59tOEcUs;xx!I?m+V^{q}92?(Zn-|o1G#s}78{54*c=9N7 zZBCu4O3O&SQz<-NE{PfpELhiA)k-6=cD&omv%Vjih2O0;BaI)j&SQ{Q|I*!TCG?){ z)iL^vM9RAnaRgRCvF_u|H1}SOi6Rk0)!Py+Jzx8a5MSyR^q_R<&vbbVat*Vq!SkvA z;fgZH5Y%A#`A?*+;{y#L{!Xlm*s}^%@zNCjdw?gb218%Y4ThosJ3Ua>aZJfND6?o8 z7jOHETw^59&J>d^R|T2)e_x~@fa87`UFX7IL%28!MF(NhG6VtP^ucEiOL_|>iIV!ELu%}pcZ)wd9d3G<{fSo1 zGwF%dI?+YXOxAhh%`|TQ>q|x6jXFs#4av4wRT8a^phZ%u9u^oz85&{iqqakq)7{ed zxe$?Pcm{_L6|G?oRA}M4vhZ?MS0${9w5!39ysAS0T{c~VKCgb8D`yQieA|jMW5k)s z+RA%;Fqh+YN*>(k=oIcLmmx?Ti-e56TEtsDg(#sd10R)zTfzrV;oyPbVaSKa!$+hjm&XC;$!LRqQ76X*1yj;p_j zZnWKYk$p>|d>_tXf6QR;)y-G2ubzVnkNJsS)HLIWFYkO?kB8{LygX*({{} zbq1|g<>z7KmND99KIoMj|At|jyp$an^1#nfhyQyct6d%sxGz{98?CwBf;DmL8 zb=qqgm1feI$nZdIZ&lXii12mnw0=XZ)**3yH6I9^Ezq$j14KlIt!cx`vk<~wuI;C` z(r2h^H<*V)dG;9`pM2_6F*fajGhblKedLsgh}og_81GIrL(&h6Ys?y_ietTG4TE&) z^>BPM8#$5}7?yB9?ph>oc(^#x^@gaKpcOZ~_KzG-fojwHoXswHGj(xe(fy?e<%|VSSrzZYjZN2 zymStqpH^SN?Y(iov*UAQIFbQH)P*OG4_tAOHp;776P-)!()We(wUg}#OaWC>(=uU4qLbE;ANC^46-_{hkERNnUWxB>52`5F=`CXr;!=k z|D?V$IJ70lC2-9fWW&2%vD0-I6UW32us{zsTHwE}oJAHVqSt)Y8th}fiQa8^0F_qN zS{{-zvMIGZKH*xwBS!eM1Q`ABabkPI_u;kBg7u6769=JqDh40l>YOqjY%Z=F8>qJi z-oVt;j*2EH8f09iuMSr~mS^h8fCduT3G}fZr=oNyEFJnix1}jxa5}@9Y;fzUrgl47Xt` zd?VKO0+C!B5jc5s<*9qosDWlOiSx0Yv&frGduOd-!=rFBLNhx-+WOr=d9R2Q+qkDC zZgg#FJ}j|gx^<>h4FQ<0MYo|K|?mrhlef(=!~ zy0uz)q|H7{4O(hO-tD-M1)lNy@aRaroca(t$9m4tkHsxt#rD?|8i?Ws>#KcLlW+d~ z*n9u%3r=8yt9NQ;skhD} z(vr%4I;Z04u_1VxV+SwqLXXNJ(bX=5cZCLLLEwi^i!TlDf3X7B?aW)92;(=4vJQ$? zx~}#e<Vj^Z6y0r^QNZOlD>5Ng7s(g(HU{!LQeK1S(p9UI!0_r zzx*PnfmCX^2SwAsi5nc@&8gq$f^$|3AL2t=w3%WvOEQgVn^l^R6(>eRnzEhw>`+y? zzFFmhBRQvx4irrJ3+zJV>Pc;|`&&wrlN`C$GSMep&d0{K&sdO)>wqSIf`%g)*ixlx z2E)>+QjxD^?8`A(pr9!Y4|{-%(%Np@!75|`kJZyqdD?yTs@Cg~rn@vYL%{0OBW$R9 z?+1S^8`|r<+PoFvZ@}Y1FmI`HpD-inuFYtEpGKlL?~cRmWX1-ooLvLz-XW(a{z&0R zR8aTE(OnA)t1w3B4f;g@1coxE*q8iz#ax!`WojDgn7sy)mB}@JK z1sNR8o4+81^n{Dvd>BfNjK{#N^9r|Lb&O)I!c=t}B}H=Ek+w+|5h)SdpdKCRVbad2 z2ZQ>nEK2HQXFl&vY;$ijrQ$N7B3nCv%1}9nTU5(sr|>alR7@{h0x=C+M-3Gc)_Fv> zVhPbFc?B2ZkQT zNQ1lSp2p7vGQ7b$I7=yw)mAY|_wJ>O1j;F7B@%a$Pg%KDTpMxPhI;p~tPLtpJjuDf zze-h(meutZYVb;J@lWk_C!^RA9wBgTSLBG9hm zP!INXzYq?aFe-gOseItCvd@+_F)Ybf%(3Q;T#RlEo26l$#FrZr2j>7MR?!h*kq+8) z&t-x$2`}KGDpoRM!V_Z#d?Io+PSxB_z2`TvS`AiNPO^^?RmSIX>~9s~J38%`-(t~y z`)8^L^!@_mnoLdx>K#GV?Qp^bp%pvmf$Fa#U|eH(L|fnd3&IkugKDG)P%!^IULFa> zxY{gRZsoT7LSXnzWy}u(a{~Ld81I%2cvZnPq$H*_+e^Df-8fp z>D5Q#-iegMwS9%;v$>&x8Jz6CW^*XowKZZgFxPxv2z$M#&aAw)Ro;lvZ8ensgrz&G zzHdc_yldS!Xwk^8)x)&5A~PFp9Y>Ki-n0WzA@{pa0rF=V8IQ?6t8iGRI5EIFG`SV! zrp@EcSYS`p4_-A$N}YpCVj+$(5l*y83o=iHnn7K?L7(lC9XF?B_i{b2-N=7^kX$uX zim+iz2ZG7H20rFKMT0f1fkLh%jAfM8$}4XoIhFD~c5{w6G$@19(ZU?AwxSc#33;7E z-4x&0K_eNr70voC&}m>K?s&Ucd5?GXhI#iBVf>@noG)xNUg zM+(n%4r|pDYgh!mUZOW;fzenP9C3)v%o z;NE$X=`DiTF$o#GPukQk>c@L=C{u}^0(LXk*vRw5PV>U?wiV8zT@R#3cUKO}6-g69 zoYst%ktJKeB0cVFp4Q#DSKf?OUkdMF#mzNkI&L^?P!)3&ddH0RPr36bkyS*>5*;) z>^9=tsogI|ozFMuV(uCJn)aND;~ zJSxWyU8lE7E*=KhxqZsi&)$|9EjxQUPvc6rL`)f7L0Y!^r;L@a}j zO|s|=iwth8jg{xC zssHF7J?kY_sv-vWaM6I*0*^rD;zPa!kO|XpA}VlFLH|~&oQ`glc<4t(R_TeXsusao?v%J z*rSSN0E?sf_JHNb&&5!u(K8_4yanPCa{ADum-x!O1&`d+^6fFnKuN6dcZz7#M(<9X?h z+vE3!RbU5towWQEZz%x55bhS4Y#5tYaIDPko+Sxs-a47vE6r+ZB;R!F(A2Qxvz0ViCE|F-qr{|iw8BXG=s zf>xuoQE$rs)!HnIN)PRCI<|E`+sq`S;k$6Z<{dyKyei{jx}7k;OE+Qdb1v(3i{&6f zdTS()G8dvQ?wBMXrxU&|y#EXP5kqo?HnZbb;{()u-}x)p+*aK0TC{mW2J#uNo!_ue z7%iv=F-^FV^@$6s^n;H;M|^>e?fVVomYmwxOn?c*JTK`U)31$atOaQHJMvBOP_EE< zNq4{cN$g?FW#Y4^8!{vyJ%szZ6ux>ZS1$uwVQh8c?7}~BAuad;Bakxrk|<3Jj_J5D z|BnB=Mm*XdwEg>EA(*zU0;%9~M}+mij4-)BXxXPu%jeLT)SC=0s3fjkxf(+8wyw4ZVe+gfv*Ykzu9*7+$VaWKh+35#$3$Qq+$u})6^qv#p*^RJC4#6Kw2ukhFhSAZEUeNZ{Q4O2*Y@Yj!- zV+sv+e)nIHiQi9_ZcO{(GJ({p?=NLy+?a!kSVV)CWa&|$PBkxb`pY*a|HBwuUU>H& zZ(4;rZ})=0{|e+ieeURd)QUGX9>DLf&YA3Fw8Rn)S;`#V+>-T|MBNAY$P1H{zF!qT z%-}-Z{_?14_AvzdH3F)^~kjn6pk0nq};pv1exUHB$Kf#y;H?%tm* z?zTKdz_t0KoF0<#Xe^_pfVkQYD&&gz+3EYUfPALY^7vI>ohFWHR}K@1>9Qxq^Z znY+3xGM~45QtD30dvlL^TY%s+{{Bms+YAI2wqF&-Bot(jU%F51Z&~xRgZ0y^0j0!6 zFQe4Mw~Z^qm2v+hpCo6zoCMy+0BznDnHk$YH}1~q(3WdGi7CTdp11rRLC7}WxdZl9 zwSWQ%B=wnMPryeWEmiG1UrG<2vD&0T-Z;cWHir|>gXY#tA7|~=?fqrCslG-8nV@|Z zQbF6BdaOR;+DnN9pG%>IOxJq1OSOkyCMCB6!!@cIt8Vr86!}ZV@`o)TnumwNA60Ee zznnv1PiGX;XJ(pAy2y*a_1CjuM%Rq>8J^Z<{~k@O>iAt$ZF42&F@v=cR+RPUbr+rShDmnmEzUmXve{m|+B?A6C!7nxwS`BS?sZq}k9c z2V+wO<)1qJSL_)@=O2#XrneLp6zCs#lzVfVq+>tL5T8+5=Vf+EmQL)U@ANT!ty|-e zxTBZycvEwfE>lT*jiNt%6#b}4I69%SkiTiLXzXMj005gz0I;!ZY3MU(Obkz@s^~G7 zLK!#AB679NQf57jZsn*j*ZUR$;^3lkKSrKZsf> zC7onDH#kaHNagilZAeQ)YvpGJ&6$+pM>IW+)L(}3$tU5NQ6jN@gXs$C@22PB>YkvQ zCZnCW02}4e;Nq$2`*@$T3WD33%RkN3hWj~Tps%^3$;X&IL_&&25hg)3v-Nc9Daf}j zmt77C@BC^&2EG7pu~qtuLxFV@B~plO_IIK=B4x6m2Aw8mSOXcB1bsQJjLhChWmKAY%G!rC*;6P(ooGe3>DPUuQ4sY4Kv-_TY#ADE9k157 zBq5r-$pRkNg+v&e>T$l+oF%S8btHta%~@FdbkB@CG8*xB_R^HlLa^U7K~qO;D6!4D z&W>F&MbGYSh!qqN=@qTGYx{Hw+c>kgW@2xZw^2fa1o4kp8o$Yl0&+eIbS6fj<A7w@PRJ}T1MvA2TOOA`v#6r*D0M;!B?(q7YD0msR(77_yLcrKX)aD#tR4&SNhU> zXF_a6c@xbxD@8~tl1440mq`yu!gWkDX`Cf3t*-&AP_Coq7yn5JRl}grK$`HN(QE}xBo89!$#I6iPh%ex*QUG#u9J`o4W%J=0YkjJnisZ3xdMgsrnEgcRHiFRe`s%Qil6m;vg%#Q(oQYt>kD{~GI*j?yG8^i}C@pdwMDw0F! zknESDa)J=r8xeJuhJVX4H$ICStS9t!@K9xYvSZHvd6(pRpZ?N54Hq3QN07gtBMOxY_t6#9dkeM^B=MkM6G*?N<sg!KTe-6^Me)TIr9ShTU4=ySVb|ZGCV1|5H&1wP}zUX_+wPzwlQG0hqBjZXWhGJv^& zk3%_avHezB`7_%rwV**f+3FaK@AodS9!I0=uauPM^x=@%l;SFVi z8HX`!SE-61C)?Wmz0i8p z#+(b);ZnN^HMLgV4dor3{KOQk^_PjSF^}D3!J)hDsOiOch2NuDf6(jH76seU4Y8D_ z$Ffk3WrdH#kK14QFQTuK8Lk0%VL>heC6ze@YQ6u= z(-lh22XN~g^2oKMk>IuEG^bKs_I^4PW5FwId>9~@p0cgAS(#M&KCggJF#=<-s<7cf zn*UQ-aw~*n%aUrt4`XV$@HG?K?TGA*Wh%-1-W$GiPV<`~Usr}^%gM`>(6f#HS5dz5ZuS#|ZA5c# zsO|}&&EJ2$xby+l18F+kaML{o&MrlA6je2%w_>12o5uDYF{kn?3Yp`-Dm~)gi8a{) zhZUKO8)RF7wfaPe6RYNk@S?WXs*j$r|8d8ej4AL!3$y`wkz8xQ^&)}QyV1* zF>u^3gqw-%@Q~q;4WtV~%tfj;LY|@Rag(bWbixyIV7efxKMN5aJEC@A%yHxW@IZ@Dm$O^&|Lwd))tyx*(-#c<^e=chQ+8|=NYA@$k%qfdHfx8ui9C&@CSY$cRMGx%k!BrPYcmX7A0 z71m}e?Yq*HB`NG6>CxG96bB=#+B^RgZ>f)?=Ms-S(m(O@TE8TJsu8nt7NAZQ0{nO@M zNF8&<2(RfwkJ|3JI*2CJT}>FF-IJ^(|JnIoKR*nzlCQZG6gbLXCyKXWCl4DX{3SW* z5AT6G>5R^pN57-=d+DR6OhD!|>G^LRXtGY?lzA`BhwI+sr4qbfnp}lkQId?LI-rQWwYTHuo`_l6=lhxx9se$ERq+IT!yfk^VNHyr)A(U)-#R1tXPVGZD z9u|Rsn{Btu9KZozw%;Qd zkc9rL@Gxp7vj#-`FG*QdMa?C}!X3Wco64=4S3dlhL$;fosX0Fq;1ADyMLt>3L^MDk zg=xmh>II@V|K-UoeiF#kvy-6^(D}zGF$V-CFFl`~u}oHYNW{den_9R<@|W`DhviD0 z1S4ur=avL~>am{>d1;!Be+mTlS$L@a*`ydpEj%E2sbm~6nWMBfYR0(>@^TS(P&(Kc z+h^QjKX<;g7q*o;)+(C#SKOG_*Rnc!(Qzy}(8f7I?G z-AmK`Q{Ho=_M9g*uB-WlowRHK*LXD3(^?);L-7FjSPat?58b1>i`d^vN?}u=K=jNl zbeYoIOZy^YX(gxhBEw7}Lc}*b;}LaIw2aF^A^~VlAb`fsYFW=Nhz#AL+4lKTz&P6w z<&rd4a2Yrc*xC}=Ib{S0gyp{{&wZg9(vtt5qJ>15;}mHQerPbFX0x-8pA68B&uC|Ag=Hg773K zoK;xwq$tVB)&=fjH&ahS`F*%v0&h(SSlm~mL!6Dymbqx-dX<`t&O*^PJ!_Ph})_t55vP{IUP0K*%otO4ZRq-nl0*+gf zd4gej)A{@2DIr7-EwIiDnrskFTM$d5Y}O;T%MK}QPbYOYj@5CjFOsx1gRD_@X#ygb zbb{?@oFua3c3-M~)){gJSZNjR0_TAIMQcv0IF;5mi3+I&Yrj^X9W+6VePrtwuvPVO_k+r8^E z(;i+X|3x1Ii+D5}ez5EnL`BT7J#Gz@Ut{s=dR(Hs# zVbOv~^4VI3Z5|`S%|q?56CX-&mVokBVf9+$%BN*F4V`%f&eW^M>ju*DzDwX%#EUW2 zISFN`)bSPymqKk|Z7mi)gH=!+Z_CQ3WBUPa*5w4hc7*%D52#{0`cvD3Y$uyPbkk+MREkaj6l$LcJ z*H3VzFy?!eyiY|(3kr5oufjxxG@H`)1+;lp$Gx2rX%~EEVy;cMk&JvdaN#`ZB{1%0B>HmajeTgpYbO{wYo=5 z=B*4KNe=kb5a8GNqA4BC_KWnJ>o_TT+7IcbvOPB|uRPWphsI#%#=r=SKd1VHUuYV* z@n!QKF~T?6@de2V9`qYMIJz5W0!=Gu{Bfoi{xEK(pT|OHvw8OP(LT388_^dFG(IXD zihHj0L`_+g%=W#FwQUMy>P{yzF$A68^*YZi6Yj@N9PD#jhjQN=|uoF4Z`4Kt1oVb(T z9l8n0oZO}5_oSVFu20zVUshuu8^I~LQ=lcs4I3o)Fjj59CqJl&ncFQSJ8EF=09$a^ zqI}xt=J~)-ZIw38$~Qt-o+$4wd%u@ZW0SHE$BZbbrA|=rfAbUhEr!;QdlU%M9>!*K zFgobZEx}_6-le-ryC1tBoT)DY`Gnv~m}oY9-)PYJD~}UPaE^2QvgGWJnWsY2{oR!L zz0({d4+2xQDw|sPe8i3RVuMLSf9@jwL1ed%vU-crvT{W^=5$}C=E|`x_4!f4xzlsl zJJSVo?AvU&tG#rN^7l}OoNG;u*{_^8Reped;AKk}6ch~AN^D}j==)ZawU~vMDIE%x z#{!}at67%Y&EI7?UhzZui9gQQyVTWObhAJlDD;=r94*RrxYLQf}tv*AVKJ z7%%v~MJ{d3%xVc#!}oa^SB9QT4*#ImZfs-M7=qMoyo&!;leq|jsHi=L4}9d=j1-@P z>`z`o>Vg|W7l1E8576{0h3;O21B=i{ZGbTo5tK)=!!BizU$XDiZ=j-xpuRV|g+*ya z!3Sa@bCTwyZmRO2c+dQx3eG`n^qdk+g+eoQ))ZJpgG6!E1@;nMRJ`56Oqh52stw9b zi_r72pqsQ~2hiC{Q(MMHbGw&G$3K02ePcyc&C3bCewK?-#k8|f)^gckV-h!P;H&ui zzniDqh&oeh*eCf1{&TVEoRx=NahhuipVcJEUFO|2xfpv|QJl6sP)8H@441Q7cbV_{ zeb)eSIcJ%={}pmJCu!qHt7IB&IBa&=H(cT2#MyyIXhc?{?Y<9W9Ho zO4xeneWNQ0zf)?+4(dD9V^M@y*h`I)Xwg_ zoH>Wp)R%>ri)r|`*4!LjMh&~1kv}KCV4zuGBq7Hs>WfRjk4PPw0Fni+Y8~ zM%)1*^s%rx(DG0%AXTyv(eY2S^xlxdtv)$NjCL6JCMC`4V2J2U8t?>*VSV2esKGq& znVFuRZt6uJVEfD|Fqrj5t%e-R+x_ER0C!P@3Ry%;!FJ3{9}cL?B8+?Jd_k;v&B!yW zxfpz@CnBMb^3CO~YmFfYj8hMP3%Xxv)agZ7nA)#2`Y9;9H)NTT>BaV4Xoo=SkU;Ag zn)cC=;KlF;<)n7VXXtf-n-<=peuYx?VRUsF=+#=5@xRkl>~SpQ-tFAx_ic zMz!N@##^`;y7`FHVm3+kv-^Yzy6v^GRKwBK^F8P0RsQoEl`gXGpMO<%#>1W@Veg^9 z!IS6%Esbz)RuAL1!)u6OE>R#|IBdg$_~Or1i?g;U4&SSF^y<5mIU;3l6lO- z&tu+o{Vk()w9NNE@69XuzpES&kH| zd)^Fi37nydz9I7uE@G>$Y_c7E2r?JgGrb7&(Zq9W2KHdWeA(NH68xkeela|fCL8pG z0cioe+ph>xya1PQ+l`V&3evY73W{D#Fh90RTJUIe%~!(9J(Pv$iMdw_w}$o|>USur zU5sP*ISi*>tEGvKCSd$n3khb^7NvRe$W;m55nfupn0x@*vn!OZ4Zt}aSj@XRO( zL9>b!22+IfLtPg`#&0y%*)?dK+^|}h*+X0K*oT-2X`}6DHSJ?4Pz_r|aIggeV9^YM zdVEl^TD?VSGZ%P(;hSY0n%%L#1|9A<9XZ{V9J)bnN58ZPC9f@lCEg}p;Nw>K%xhof>eIoXkw8Z%u0}`!&K|_eQpky&yq!hG7 z&uq%n5)Wr?E}O0i{B21j%a@vyg(TOJeT(vD!392YPeNm=E*chZjSEUc>kyj%V5ph2A`Ex*&y2{q-+rpiTh=g{`fI_FV zEcKvY;lc}zYVSi0w)Tr!3cwIwvee%?bVFS4#kgv3W97zqyXEB`>7N+8!LlE(0NI2$ zaJMBU_OH&8ya9s#Y2GE0_h{0eIQcDUjG3I0q7mb#{R;bXAU}J(GO4#Ir?scX`?3B+ zrE{srCt7H!4xFy(4IjU{PB#rY3dGce;TxAV1%J|m$D8oJAWqP87JIN8yHXE%UwUB` z?LSgypV_PAe%0- zPW)zzz+i9J$?27>&ASO-4|W?S0StMr4_i0BdYs@^q3#yx1}*DdWKK5H@OsC9ye9~F zSkYRos3bYE_L#@-p5ocvA zy|`}I>JRl`z~dcD-2^sB|KFCb<=OliF6}13&RlHyCJ}wF0Za?ia9j}FDyvot2)OZr15s9f>u^}3+104F#*S^|G~RbU}>kj(awa> zY;|sgk5H*(F-$rch@hQn&hV_;Q*%WsPH3Jg(!)5Bb$uQU3Hj?M%w^K@@%4;a?8X_R zIvh+*^)vh?GhoQ^_y@YAA2nMXM$T-z2Kl+7(sECl@~Dx@c81R=D*~c)#i~0pZxcFOB?06;E)e` zRS{nYYql+`A4Mo5!~14}`jprE)MpG9%M5Vl)_9SpdRRCU#H*+bh9VyD^ix5GAJuQm z>X4y41>zdsdqRmz7**7))RU%6;NAv>NH4e4ZO0F=XTky>*~;~Xt&kV`ylV#z+)?% zI-C8AfJaQe?2t$&NC=^ZZMNEA++d~c1!}-u;f*8DsJt)74Nrsvu7-xw4Q8~I?#fm? z3B{?0cxYywuACtaOz4+WtlNr-@wbQ%Zm_d)Rn4sI|R#dz>y zAf7b!L-Oc{hQn?N%@%Z#?w7-7K?SJ1cBQ}a+pIn~qVTfky>fb>h&OP=dE~-xB-oIy4&oja#ONfc`x|4ERI8{FZZX|f9iBXb?gJ8S&e;{Usw_T}O+*QVF{zE!h6_mdhyIU!sqjb5hILw?@cCJE_R2rz| zI0kSeGq(zv+%DuB${$2pazfLpNLFkRnyhQ}%{PojdTb8F1Mq8LA;tQl9WT9a=9avz z^0rV5K$&$g_PPt}5(-iptB#eW1rbLl2o)crBGk+>56a$q$+kYss9>T6d)z+^C0Cum zh7o635kC3DVNl zd06gUqKiDmnU%>TIf_6jvB{$Mfw74hClFTXo0^)Hm6w@BlG0mogS0EH1dPuNX-TAV zsnLz3PEJZ;F=uVI^kI**cW1`r07-YNP~5Pai-!fwil>gVJvz8CEPazi+Up>_t{)(n zana@L+2c7}sxWB}qiIjkBu)L}cq(u23QMo0b=Hk&y8jVy_YH4V^p_o2cmH(J*P#7+1c^3{SFFO1OeBHpV z-@b|9^X1o#?DHAfH?~guaRLrHV3Lm#cz(fE@xf3thz+-Lt1q1BVoxh^M^m0;BC7$P37nHUsKJzzGa1-kZ1EpEgx zA_^`T@fG484TPpt^J~XENb+z5-L*QR$V=webZ=cFznQCc-j0bDtTU{mtvi$UH4VvG z{){e2dMm59(4Le6246}2%Y9A8Zh)aqiaLRf?15O-%YzBE@i17*q2zlIRawj>qSCor z>;LcV=v{o?nJV@T#dmJX`d)Fc77V2eq@zjLenhZJPwpT=I-K;(2^$Yug8@;9FANn{ zW{c}b6u~kXm)9c&O4sXGSCciq81@4HeUJK zWH_<0<hqN1lCj*ll$z4;0&OM-!jX_&~~`Gu?MY* z_>gNBfhoFVm@HWL12jW=3lWyFPY#tf^reey-PVAnjSW1bGf))^zhNajE{%}{q$|7H z8u=Biv|1~&Nh_f}c=&FPoSk-2z?%vUl-=@RHkHX!A{dUt#{`5@^ymX|L!~ZOM4pNp zIFaXz8dDh;g7(eGb&q&i^~zF7k-D(~3pR`4TkpunIayHUds-9BF*8(bnV?}e1(nOMB^@Ssn8MYB zF+>P@+HAcP)h`c(v(st^MUpf!UrtH`x0q)}qAiT^)bHPLV)C)()F;5AUX6LE{W~M( z3ouc$`5OyN0o0XnC(KdBSwEfj3taeyI$d3gV1${`V#a{Y*%c#D*>|1MB@ax6v$Uxt z$39TkTLhm3pAp$AQA~{>umiCYJ+yHf~GX%+Y zZ|&pUvJ}^%Ej{RJYM`JRKbo`!7|r}b${2`bKGZRVu&~<0eGT@$eK8{JHi@njb-)?& z1A{(z)ozdsb?(DR16-M7#Z5c#xx15WD^>Y}@OD;hMLy#5n{kMcrkN|;ax9{UeNdK? zmd7~*rWC_g;zeO;u>7_ku{xL*mtdO#Nz?4`#=7KF=Jv@XuH&L0G%!hw0=e8#9)_Ur zYGlQspD6OC)LE>!eBYTcT$rpiTqcL=(+fC?7(_U4GRB_WHyj_vE*&UWB9%zc;vl5^ z#0IeNqRN{>P8S5Jj%p7kC2DU}gVw)sg1E7x3#xZj7Rm2-or7xr)z}*Uy2z2m$sO9` zZdQ~CYj{+ikWdyI675>wGCBI=_vGHm?2>b*;|Hp0X%Zzi9G3WFiA8X$ij(&BJKAs# zxQ8UmX4^qUqZ5+4K`XprvW_xjg9<5MuZ1*>$(}v*mt~iTd4*)o$J(O|izm^!lb??T z$(uVqGu;B(h_lAK*h89Cn5km>7`$0@U6_9wtjuDsE-- zsx5FK!M4gBFdh3=znv+cS-WC`n%T z%4rHFfiB=gyJ>fo@DH?%RQ2`lK~&IzX^I#X#6^rEMh*NtSSW9xxzE3hz$A=Vmb9%( z0vE6t>)qFAw*aEgH`#xu_`gQWzt-5^oqm)BLfFZi_5BS$Rw$L#M*)#rN|xAw=T6$sbh%^`D`% znRpA~Sh^lTPTY7ASmI~{Ic!MN__mwcs%z%v{{2df)b;6`2-*4L&C zPCY}8hPiL`X2T)B^k~wSluyDcJWMeHG|42l_k>5DjarV# zPf3o*xKljyKA-T3G$nn|V4Sd;S62jVfN{bqh#mDkdC?$tY>aPzqC{Af4W>11O?M7e zRA<|0gJ*)A_r!z+=7KrlB7zlphS&0W+(}txQ)=m_icG=)NRuM<0kEEMSjN5N_I2#k z`~>sd8KZOfxe8W(7^pvt=9_ZO9tr8B?-IDERh>T!%EyOo+6)l{Qt(m_Y21 zavkRQg#GM(eX~Vxv-`NRnud~GJtxiR1tuSXifibIFb7{W5F$evkb*(*S1FCA^x1fr zphBE&*SjZ`H~bt!jGqIW{2qTKJseP9d$0T4gDt8<*ukGwuZK0fV~Kev+gevUaxTmN z>XY0dZ;Xl)Lk#b8)buj=Ah2TSn_4DsF*B$jRwwJ;WuDw_&M>oV8%@13^7-|QfdALt zxra4*<$0X7Yn|0`y3e$Tqg-_Cv{Q>o?Nn|e*jigHDpfQHfuKfoFou9!BtS@SI(-z9 zrMASDn_C48;bPDv$R)9MC2b=?1d@`FpdzFY0zyavx$f^xg2mlo8e#w0XZjBhPcA3# z`+Lv%o%6ou`~Cc$+sUcP(J#-{zzx44t_JHZ2=3yYVbde@8G3#EijMKUg__A5-FS(n zE4jcVd1f!VR`Y5}oj9ES?T^;$DZ4e8l^ZaMUBc_x0gWxkjt(k)&puB$^ZV9%D)OWv zutr0O=`gnQxu2(5vvy=QG4S>2CL9h!&8*s}0%KYBs{}!`|A^o=osBLQDsrOKLJf<= z$?g`mGiv?~-EE?cv~JRpf~jf5%j;h;1?AU7CG@lr2vV5cceME zFJLm29X;E|lELBkn65HCRa?4K_`*8RHcf$a>J5obRi{0CyLe0%A?1#oG+eZmj6W)C zs!U_jD~!~HT@~7W`nn-?-R6)I6N8J+QUHF~Kp5U4Lv?HDj&ga^b$$*U#Ti4ihhY`d z+3@B_`p~nPrBT zPsq1W%ykc{{KEThT=$a?+0MVr2`Ba@`NK$P^+T!)3h&s!ruC|rw5vS>_Ho$R6PRXZ47~3pkX7}i`-t=6zZ7fla!Monm#GfH zz*SP=0SblqS9KF+gpQ9^)r-^e22bodo>n{xcK{77C%Wm;rQ_^^yudDbS#U5TQbFgW z48yItmUCO)c_d2JXsl}r>8eX@tw|RF8sLnejX-QXB2M*r=gA4^${I#?DfxBEr}>Bi zRA0_-7?i=x083OvAI9|`r)LLu6v>1u()Z#T0eNa{<^l`?kt9QtK(szc)V zcpWQ!r`3~<|GwrIwVe(< z-{X~;7qgbQ-6sSM>#|?oB5>%m9*^mFmCacmq1n6Ek>(Jr;kPM5vhm(4gxE2_E!Bt2 z;k0)lvdo-Eo$`S-359bBdC~|`YgB6UukD;mR0dc4EOOvG$@&WB$hsXr@Hq}r}RourrZq3@;0nxhion#BQF4`AyIf%XaXGrM$v!jxJfU(4)N!Za2FW%#qJ^eh+5exdDP-65 zj1Vn=T}8Xq&=oP)%t>peJQW9fv#Y&ruqWyvfNSRGikfM*w!Fc2w8p#9Y@X6;F^qiK zHWySfGv~x71U!$x^^-Bofjq%1c14OXW14$HFH5L=4YzPS_5Y-$fD#(^)`W2}gX5a) zYqmhG3p8*Mcv~qi8tif|?CKw%F%Qqw*(yJaAQdt|txZ)P^Kwl%Hd~xt9U^KG)eAlc z<_t1RcS%2=u24N`)DePl4%Km{Ycjt7nNg<2$(W`r1Il|826G!W=G|j|nNPevLafF3 zFE}HS+lccS&Di&$!~(tx`;*|kWB&6vpv2aWj)>r0QN3AKmTRia+4_{kK9E(gKoi&i zLePc-j z!5+Vw-SCZkV5@5){uy^tGsWv>ce?|ks^9MEal!33OF{N2A@d3s&SgHmilqlAoiwVx^cZ)1m*D00B`ImEuEidkC<=Su)tO*RObf^A~ zVH){n_8ZgQo`pLQXI!ul=Kr3Gs?kk?MNIVH@sOuOqIr06+5@)sz-Nf3ED#a?8fM(p zn?5scIYEU-V&4Q7=fAM%dm{3eOn?{O4EbtW8Nc8qyF2^=JUmJ^IWmDJeCWCFEy#ye z$iwUxd1=VcRU4P(k|R?nOAs;wuo3VV?TFX5KUKhupW5TJ9j}70 zjva(jPbRMwDwwY1nI7H$R*P|b$&Hrlz#$?#CM$Ta*xf|VFD4y!U}ysokU)Lj<3oGH z(cW3Rc@ z)(H>d6) zu-eRhW5*MU89|{5iZy`){r2cx{e$yUIY*>`v;`#ox%&Hv*kS zozS~!n13mae#pH=9jpsfr|h>)-Vs$^u-1Zo)<)3a%hKqFOFxbXAy0?o#*&`0B);zr zOg>DF{G-WSTJuXK&0YZZ^cOR3jRgsKL3D$6VVFjH4iI0Klx5Dq-AM5MIteF{(yNFh zf=_dj(!%~b2V1~fqmy#ljtlshykN*8yVP>8E0<=N>|BYT%8Db!yytm4(GN8{bU zC0O=cdSBp~Iv6Gw zq?%FpB%m<#$TE$FsRKm%0%C5uttCy-!d#Fl*YF>X=V`;lv4W)`iKe^Phk4p?UDkDy zkSvZM6+zKnR)(@SA14X*u3C4&z`y8h{>o|Q5|!oj_?qTq<*8l1C+uxDnIc(6?SBm|m}5S-l{WBkyZ0&rk}NFLbo?!G;~N0aWHT1!?!m z`y2t){MBeccyGT#lXt66X)PNK7#&?2s`FBPk9x7xwBmdsuWCUmS}+k)&TVW>?xk%i zW<*Hz@zIQ6L-9n#Fg|xW)GNwSMJEq(7XKjPiA{Mwtvzk1vz{vIpInxrk#E&QrB}<5 z3@^Oz+b8eWdoEG+t*^9!PR>f(YDINc+@!jXGf2^N;f{qPK{yX7W1(h)>O}5U;~h{T zq!$hL7>w~|4kZw^@u*?&s|RKWOU%u!>fq2#a(7(V9?V!HSMwHW?NIug&Aic%$nNZp zm8y7Bgl3MGc6-Fegkn8Vss31F)#NdOxAcwEs+p&nz7D`s*SLZSmOo0tUtEuG=Pl;%%kb(g z@~*yl+~XAMHLP5+QW<*%ml&sR_z$dvJ+yNe3XE=!L7DbaYc!SB$&EvJ&ZcF2f_&{N zG>^#o4Zo-2!DTDDcRUdt$nZMqp=DWU)n_L}l%b5SBclzXW+(RByO02*Mx2%xiEwB{_B?^BEJUg z*G}^PX zyWp(B1fTi9S?33ue#TiB2>$)8vw9Bv`%m@Z(kY9<7W(k>e_ga2Rho~4d;8X1Z`Zta H;G_Qrg+kw- literal 0 HcmV?d00001 diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/sample.json b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/sample.json index 85b4f220b1..b0a302aff6 100755 --- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/sample.json +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/sample.json @@ -30,6 +30,17 @@ "./db.fpga_emu --dbroot=../data/sf0.01 --test" ] }, + { + "id": "fpga_emu_q9", + "steps": [ + "icpx --version", + "mkdir build-q9", + "cd build-q9", + "cmake .. -DQUERY=9", + "make fpga_emu", + "./db.fpga_emu --dbroot=../data/sf0.01 --test" + ] + }, { "id": "fpga_emu_q11", "steps": [ @@ -97,6 +108,19 @@ "db.fpga_emu.exe --dbroot=../data/sf0.01 --test" ] }, + { + "id": "fpga_emu_q9", + "steps": [ + "icpx --version", + "cd ../..", + "mkdir build-q9", + "cd build-q9", + "xcopy /E ..\\ReferenceDesigns\\db\\data ..\\data\\", + "cmake -G \"NMake Makefiles\" ../ReferenceDesigns/db -DQUERY=9", + "nmake fpga_emu", + "db.fpga_emu.exe --dbroot=../data/sf0.01 --test" + ] + }, { "id": "fpga_emu_q11", "steps": [ diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt index 1841854cbb..30849bf784 100755 --- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt @@ -15,6 +15,9 @@ endif() if(${QUERY} EQUAL 1) set(DEFAULT_BOARD "intel_a10gx_pac:pac_a10") set(DEFAULT_BOARD_STR "Intel Arria(R) 10 GX") +elseif(${QUERY} EQUAL 9) + set(DEFAULT_BOARD "intel_s10sx_pac:pac_s10") + set(DEFAULT_BOARD_STR "Intel Stratix(R) 10 SX") elseif(${QUERY} EQUAL 11) set(DEFAULT_BOARD "intel_s10sx_pac:pac_s10") set(DEFAULT_BOARD_STR "Intel Stratix(R) 10 SX") @@ -42,8 +45,8 @@ else() endif() # ensure a supported query was requested -if(NOT ${QUERY} EQUAL 1 AND NOT ${QUERY} EQUAL 11 AND NOT ${QUERY} EQUAL 12) - message(FATAL_ERROR "\tQUERY ${QUERY} not supported (supported queries are 1, 11 and 12)") +if(NOT ${QUERY} EQUAL 1 AND NOT ${QUERY} EQUAL 9 AND NOT ${QUERY} EQUAL 11 AND NOT ${QUERY} EQUAL 12) + message(FATAL_ERROR "\tQUERY ${QUERY} not supported (supported queries are 1, 9, 11 and 12)") endif() # Pick the default seed if the user did not specify one to CMake. @@ -52,6 +55,8 @@ if(NOT DEFINED SEED) if(${FPGA_DEVICE} MATCHES ".*a10.*") if(${QUERY} EQUAL 1) set(SEED "-Xsseed=2") + elseif(${QUERY} EQUAL 9) + set(SEED "-Xsseed=2") elseif(${QUERY} EQUAL 11) set(SEED "-Xsseed=4") elseif(${QUERY} EQUAL 12) @@ -60,6 +65,8 @@ if(NOT DEFINED SEED) elseif(${FPGA_DEVICE} MATCHES ".*s10.*") if(${QUERY} EQUAL 1) set(SEED "-Xsseed=3") + elseif(${QUERY} EQUAL 9) + set(SEED "-Xsseed=2") elseif(${QUERY} EQUAL 11) set(SEED "-Xsseed=3") elseif(${QUERY} EQUAL 12) @@ -68,6 +75,8 @@ if(NOT DEFINED SEED) elseif(${FPGA_DEVICE} MATCHES ".*agilex.*") if(${QUERY} EQUAL 1) set(SEED "-Xsseed=2") + elseif(${QUERY} EQUAL 9) + set(SEED "-Xsseed=2") elseif(${QUERY} EQUAL 11) set(SEED "-Xsseed=4") elseif(${QUERY} EQUAL 12) @@ -82,10 +91,10 @@ if(IGNORE_DEFAULT_SEED) set(SEED "") endif() -# Error out if trying to run Q11 on Arria 10 +# Error out if trying to run Q9 or Q11 on Arria 10 if (${FPGA_DEVICE} MATCHES ".*a10.*") - if(${QUERY} EQUAL 11) - message(FATAL_ERROR "Query 11 is not supported on Arria 10 devices") + if(${QUERY} EQUAL 9 OR ${QUERY} EQUAL 11) + message(FATAL_ERROR "Queries 9 and 11 are not supported on Arria 10 devices") endif() endif() @@ -106,6 +115,9 @@ endif() if(${QUERY} EQUAL 1) set(DEVICE_SOURCE query1/query1_kernel.cpp) set(DEVICE_HEADER query1/query1_kernel.hpp) +elseif(${QUERY} EQUAL 9) + set(DEVICE_SOURCE query9/query9_kernel.cpp) + set(DEVICE_HEADER query9/query9_kernel.hpp) elseif(${QUERY} EQUAL 11) set(DEVICE_SOURCE query11/query11_kernel.cpp) set(DEVICE_HEADER query11/query11_kernel.hpp) @@ -113,7 +125,7 @@ elseif(${QUERY} EQUAL 12) set(DEVICE_SOURCE query12/query12_kernel.cpp) set(DEVICE_HEADER query12/query12_kernel.hpp) else() - message(FATAL_ERROR "\tQUERY ${QUERY} not supported (supported queries are 1, 11 and 12)") + message(FATAL_ERROR "\tQUERY ${QUERY} not supported (supported queries are 1, 9, 11 and 12)") endif() # A SYCL ahead-of-time (AoT) compile processes the device code in two stages. diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp index d41a3bf1e3..fee2020eb8 100644 --- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp @@ -42,6 +42,11 @@ using namespace sycl; bool DoQuery1(queue& q, Database& dbinfo, std::string& db_root_dir, std::string& args, bool test, bool print, double& kernel_latency, double& total_latency); +#elif (QUERY == 9) +#include "query9/query9_kernel.hpp" +bool DoQuery9(queue& q, Database& dbinfo, std::string& db_root_dir, + std::string& args, bool test, bool print, double& kernel_latency, + double& total_latency); #elif (QUERY == 11) #include "query11/query11_kernel.hpp" bool DoQuery11(queue& q, Database& dbinfo, std::string& db_root_dir, @@ -168,9 +173,9 @@ int main(int argc, char* argv[]) { } // make sure the query is supported - if (!(query == 1 || query == 11 || query == 12)) { + if (!(query == 1 || query == 9 || query == 11 || query == 12)) { std::cerr << "ERROR: unsupported query (" << query << "). " - << "Only queries 1, 11 and 12 are supported\n"; + << "Only queries 1, 9, 11 and 12 are supported\n"; return 1; } @@ -224,6 +229,13 @@ int main(int argc, char* argv[]) { success = DoQuery1(q, dbinfo, db_root_dir, args, test_query, print_result, kernel_latency[run], total_latency[run]); +#endif + } else if (query == 9) { + // query9 +#if (QUERY == 9) + success = DoQuery9(q, dbinfo, db_root_dir, args, + test_query, print_result, + kernel_latency[run], total_latency[run]); #endif } else if (query == 11) { // query11 @@ -351,6 +363,51 @@ bool DoQuery1(queue& q, Database& dbinfo, std::string& db_root_dir, } #endif +#if (QUERY == 9) +bool DoQuery9(queue& q, Database& dbinfo, std::string& db_root_dir, + std::string& args, bool test, bool print, double& kernel_latency, + double& total_latency) { + // the default colour regex based on the TPCH documents + std::string colour = "GREEN"; + + // parse the query arguments + if (!test && !args.empty()) { + std::stringstream ss(args); + std::getline(ss, colour, ','); + } else { + if (!args.empty()) { + std::cout << "Testing query 9, therefore ignoring the '--args' flag\n"; + } + } + + // convert the colour regex to uppercase characters (convention) + transform(colour.begin(), colour.end(), colour.begin(), ::toupper); + + std::cout << "Running Q9 with colour regex: " << colour << std::endl; + + // the output of the query + std::array sum_profit; + + // perform the query + bool success = SubmitQuery9(q, dbinfo, colour, sum_profit, kernel_latency, + total_latency); + + if (success) { + // validate the results of the query, if requested + if (test) { + success = dbinfo.ValidateQ9(db_root_dir, sum_profit); + } + + // print the results of the query, if requested + if (print) { + dbinfo.PrintQ9(sum_profit); + } + } + + return success; +} +#endif + #if (QUERY == 11) bool DoQuery11(queue& q, Database& dbinfo, std::string& db_root_dir, std::string& args, bool test, bool print, double& kernel_latency, diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.cpp index a4640e513b..09efcf86ee 100644 --- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.cpp +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.cpp @@ -625,6 +625,59 @@ bool Database::ValidateQ1(std::string db_root_dir, return valid; } +// +// validate the results of Query 9 +// +bool Database::ValidateQ9(std::string db_root_dir, + std::array& sum_profit) { + std::cout << "Validating query 9 test results" << std::endl; + + // populate date row by row (as presented in the file) + std::string path(db_root_dir + kSeparator + "answers" + kSeparator + "q9.out"); + std::ifstream ifs(path); + std::string line; + + bool valid = true; + + if (!ifs.is_open()) { + std::cout << "Failed to open " << path << "\n"; + return false; + } + + // do nothing with the first line, it is a header line + std::getline(ifs, line); + + while (std::getline(ifs, line)) { + // split row into column strings by separator ('|') + std::vector column_data = SplitRowStr(line); + assert(column_data.size() == 3); + + std::string nationname_gold = column_data[0]; + trim(nationname_gold); + transform(nationname_gold.begin(), nationname_gold.end(), + nationname_gold.begin(), ::toupper); + + assert(n.name_key_map.find(nationname_gold) != n.name_key_map.end()); + + unsigned char nationkey_gold = n.name_key_map[nationname_gold]; + + unsigned int year_gold = std::stoi(column_data[1]); + double sum_profit_gold = std::stod(column_data[2]); + + double sum_profit_res = + (double)(sum_profit[year_gold * 25 + nationkey_gold]) / (100.0 * 100.0); + + if (!AlmostEqual(sum_profit_gold, sum_profit_res, 0.01f)) { + std::cerr << "ERROR: sum_profit for " << nationname_gold << " in " + << year_gold << " did not match (Expected=" << sum_profit_gold + << ", Result=" << sum_profit_res << ")\n"; + valid = false; + } + } + + return valid; +} + // // validate the results of Query 11 // @@ -782,6 +835,56 @@ void Database::PrintQ1(std::array& sum_qty, } } +// +// print the results of Query 9 +// +void Database::PrintQ9(std::array& sum_profit) { + // row of Q9 output for local sorting + struct Row { + Row(std::string& nation, int year, DBDecimal sum_profit) + : nation(nation), year(year), sum_profit(sum_profit) {} + std::string nation; + int year; + DBDecimal sum_profit; + + void print() { + std::cout << nation << "|" << year << "|" + << (double)(sum_profit) / (100.0 * 100.0) << "\n"; + } + }; + + // create the rows + std::vector outrows; + for (unsigned char nat = 0; nat < kNationTableSize; nat++) { + std::string nation_name = n.key_name_map[nat]; + for (int y = 1992; y <= 1998; y++) { + outrows.push_back(Row(nation_name, y, sum_profit[y * 25 + nat])); + } + } + + // sort rows by year + std::sort(outrows.begin(), outrows.end(), + [](const Row& a, const Row& b) -> bool { + return a.year > b.year; + }); + + // sort rows by nation + // stable_sort() preserves the order of the previous sort + std::stable_sort(outrows.begin(), outrows.end(), + [](const Row& a, const Row& b) -> bool { + return a.nation < b.nation; + }); + + // print the header + std::cout << "nation|o_year|sum_profit\n"; + + // print the results + std::cout << std::fixed << std::setprecision(2); + for (int i = 0; i < outrows.size(); i++) { + outrows[i].print(); + } +} + // // print the results of Query 11 // diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp index 3fb9a3852c..5e560372ca 100644 --- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp @@ -181,6 +181,9 @@ struct Database { std::array& avg_discount, std::array& count); + bool ValidateQ9(std::string db_root_dir, + std::array& sum_profit); + bool ValidateQ11(std::string db_root_dir, std::vector& partkeys, std::vector& partkey_values); @@ -198,6 +201,8 @@ struct Database { std::array& avg_discount, std::array& count); + void PrintQ9(std::array& sum_profit); + void PrintQ11(std::vector& partkeys, std::vector& partkey_values); diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/pipe_types.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/pipe_types.hpp new file mode 100644 index 0000000000..a6adbfc0fa --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/pipe_types.hpp @@ -0,0 +1,272 @@ +#ifndef __PIPE_TYPES_H__ +#define __PIPE_TYPES_H__ +#pragma once + +#include +#include + +#include "../db_utils/StreamingData.hpp" +#include "../dbdata.hpp" + +using namespace sycl; + +// +// A single row of the PARTSUPPLIER table +// with a subset of the columns (needed for this query) +// +class PartSupplierRow { + public: + PartSupplierRow() : valid(false), partkey(0), suppkey(0), supplycost(0) {} + PartSupplierRow(bool v_valid, DBIdentifier v_partkey, DBIdentifier v_suppkey, + DBDecimal v_supplycost) + : valid(v_valid), + partkey(v_partkey), + suppkey(v_suppkey), + supplycost(v_supplycost) {} + + // NOTE: this is not true, but is key to be used by MapJoin + DBIdentifier PrimaryKey() const { return suppkey; } + + bool valid; + DBIdentifier partkey; + DBIdentifier suppkey; + DBDecimal supplycost; +}; + +// +// A row of the join SUPPLIER and PARTSUPPLIER table +// +class SupplierPartSupplierJoined { + public: + SupplierPartSupplierJoined() + : valid(false), partkey(0), suppkey(0), supplycost(0), nationkey(0) {} + SupplierPartSupplierJoined(bool v_valid, DBIdentifier v_partkey, + DBIdentifier v_suppkey, DBDecimal v_supplycost, + unsigned char v_nationkey) + : valid(v_valid), + partkey(v_partkey), + suppkey(v_suppkey), + supplycost(v_supplycost), + nationkey(v_nationkey) {} + + DBIdentifier PrimaryKey() const { return partkey; } + + void Join(const unsigned char nation_key, const PartSupplierRow& ps_row) { + partkey = ps_row.partkey; + suppkey = ps_row.suppkey; + supplycost = ps_row.supplycost; + nationkey = nation_key; + } + + bool valid; + DBIdentifier partkey; + DBIdentifier suppkey; + DBDecimal supplycost; + unsigned char nationkey; +}; + +// +// A single row of the ORDERS table +// with a subset of the columns (needed for this query) +// +class OrdersRow { + public: + OrdersRow() : valid(false), orderkey(0), orderdate(0) {} + OrdersRow(bool v_valid, DBIdentifier v_orderkey, DBDate v_orderdate) + : valid(v_valid), orderkey(v_orderkey), orderdate(v_orderdate) {} + + DBIdentifier PrimaryKey() const { return orderkey; } + + bool valid; + DBIdentifier orderkey; + DBDate orderdate; +}; + +// +// A single row of the LINEITEM table +// with a subset of the columns (needed for this query) +// +class LineItemMinimalRow { + public: + LineItemMinimalRow() + : valid(false), idx(0), orderkey(0), partkey(0), suppkey(0) {} + LineItemMinimalRow(bool v_valid, unsigned int v_idx, DBIdentifier v_orderkey, + DBIdentifier v_partkey, DBIdentifier v_suppkey) + : valid(v_valid), + idx(v_idx), + orderkey(v_orderkey), + partkey(v_partkey), + suppkey(v_suppkey) {} + + DBIdentifier PrimaryKey() const { return orderkey; } + + bool valid; + unsigned int idx; + DBIdentifier orderkey, partkey, suppkey; +}; + +// +// A row of the join LINEITEM and ORDERS table +// +class LineItemOrdersMinimalJoined { + public: + LineItemOrdersMinimalJoined() + : valid(false), lineitemIdx(0), partkey(0), suppkey(0), orderdate(0) {} + LineItemOrdersMinimalJoined(bool v_valid, unsigned int v_lineitem_idx, + DBIdentifier v_partkey, DBIdentifier v_suppkey, + DBDate v_orderdate) + : valid(v_valid), + lineitemIdx(v_lineitem_idx), + partkey(v_partkey), + suppkey(v_suppkey), + orderdate(v_orderdate) {} + + DBIdentifier PrimaryKey() { return partkey; } + + void Join(const OrdersRow& o_row, const LineItemMinimalRow& li_row) { + lineitemIdx = li_row.idx; + partkey = li_row.partkey; + suppkey = li_row.suppkey; + orderdate = o_row.orderdate; + } + + bool valid; + unsigned int lineitemIdx; + DBIdentifier partkey; + DBIdentifier suppkey; + DBDate orderdate; +}; + +// +// Datatype to be sent to be sorted by the FifoSorter +// +class SortData { + public: + SortData() {} + SortData(unsigned int v_lineitem_idx, DBIdentifier v_partkey, + DBIdentifier v_suppkey, DBDate v_orderdate) + : lineitemIdx(v_lineitem_idx), + partkey(v_partkey), + suppkey(v_suppkey), + orderdate(v_orderdate) {} + SortData(const LineItemOrdersMinimalJoined& d) + : lineitemIdx(d.lineitemIdx), + partkey(d.partkey), + suppkey(d.suppkey), + orderdate(d.orderdate) {} + + bool operator<(const SortData& t) const { return partkey < t.partkey; } + bool operator>(const SortData& t) const { return partkey > t.partkey; } + bool operator<=(const SortData& t) const { return partkey <= t.partkey; } + bool operator>=(const SortData& t) const { return partkey >= t.partkey; } + bool operator==(const SortData& t) const { return partkey == t.partkey; } + bool operator!=(const SortData& t) const { return partkey != t.partkey; } + + unsigned int lineitemIdx; + DBIdentifier partkey; + DBIdentifier suppkey; + DBDate orderdate; +}; + +// +// The final data used to compute the 'amount' +// +class FinalData { + public: + FinalData() + : valid(false), + partkey(0), + lineitemIdx(0), + orderdate(0), + supplycost(0), + nationkey(0) {} + + FinalData(bool v_valid, DBIdentifier v_partkey, unsigned int v_lineitem_idx, + DBDate v_orderdate, DBDecimal v_supplycost, + unsigned char v_nationkey) + : valid(v_valid), + partkey(v_partkey), + lineitemIdx(v_lineitem_idx), + orderdate(v_orderdate), + supplycost(v_supplycost), + nationkey(v_nationkey) {} + + DBIdentifier PrimaryKey() { return partkey; } + + void Join(const SupplierPartSupplierJoined& s_ps_row, + const LineItemOrdersMinimalJoined& li_o_row) { + valid = s_ps_row.suppkey == li_o_row.suppkey; + + partkey = s_ps_row.partkey; + lineitemIdx = li_o_row.lineitemIdx; + orderdate = li_o_row.orderdate; + supplycost = s_ps_row.supplycost; + nationkey = s_ps_row.nationkey; + } + + bool valid; + DBIdentifier partkey; + unsigned int lineitemIdx; + DBDate orderdate; + DBDecimal supplycost; + unsigned char nationkey; +}; + +// joining window sizes +constexpr int kRegexFilterElementsPerCycle = 1; +constexpr int kOrdersJoinWinSize = 1; +constexpr int kLineItemJoinWinSize = 2; +constexpr int kLineItemOrdersJoinWinSize = kLineItemJoinWinSize; +constexpr int kLineItemOrdersSortedWinSize = 1; +constexpr int kPartSupplierDuplicatePartkeys = 4; +constexpr int kFinalDataMaxSize = + kPartSupplierDuplicatePartkeys * kLineItemOrdersSortedWinSize; + +// pipe data +using LineItemMinimalRowPipeData = + StreamingData; + +using OrdersRowPipeData = + StreamingData; + +using LineItemOrdersMinimalJoinedPipeData = + StreamingData; + +using LineItemOrdersMinimalSortedPipeData = + StreamingData; + +using PartSupplierRowPipeData = + StreamingData; + +using SupplierPartSupplierJoinedPipeData = + StreamingData; + +using FinalPipeData = + StreamingData; + +// pipes +using LineItemPipe = + sycl::pipe; + +using OrdersPipe = + sycl::pipe; + +using LineItemOrdersPipe = + sycl::pipe; + +using LineItemOrdersSortedPipe = + sycl::pipe; + +using PartSupplierPartsPipe = + sycl::pipe; + +using PartSupplierPipe = + sycl::pipe; + +using FinalPipe = + sycl::pipe; + +#endif /* __PIPE_TYPES_H__ */ diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.cpp new file mode 100644 index 0000000000..d1cf24a9fe --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.cpp @@ -0,0 +1,644 @@ +#include +#include +#include +#include + +#include "query9_kernel.hpp" +#include "pipe_types.hpp" + +#include "onchip_memory_with_cache.hpp" // DirectProgramming/DPC++FPGA/include + +#include "../db_utils/Accumulator.hpp" +#include "../db_utils/LikeRegex.hpp" +#include "../db_utils/MapJoin.hpp" +#include "../db_utils/MergeJoin.hpp" +#include "../db_utils/Misc.hpp" +#include "../db_utils/ShannonIterator.hpp" +#include "../db_utils/Tuple.hpp" +#include "../db_utils/Unroller.hpp" +#include "../db_utils/fifo_sort.hpp" + +using namespace std::chrono; + +// +// NOTE: See the README file for a diagram of how the different kernels are +// connected +// + +// kernel class names +class ProducerOrders; +class FilterParts; +class ProducePartSupplier; +class JoinPartSupplierSupplier; +class JoinLineItemOrders; +class FeedSort; +class FifoSort; +class ConsumeSort; +class JoinEverything; +class Compute; + +///////////////////////////////////////////////////////////////////////////// +// sort configuration +using SortType = SortData; + +// need to sort at most 6% of the lineitem table +constexpr int kNumSortStages = CeilLog2(kLineItemTableSize * 0.06); +constexpr int kSortSize = Pow2(kNumSortStages); + +using SortInPipe = pipe; +using SortOutPipe = pipe; + +static_assert(kLineItemTableSize * 0.06 <= kSortSize, + "Must be able to sort all part keys"); +///////////////////////////////////////////////////////////////////////////// + +// +// Helper function to shuffle the valid values in 'input' into 'output' using +// the bits template +// For example, consider this simple case: +// input = {7,8} +// if bits = 1 (2'b01), then output = {0,7} +// if bits = 2 (2'b01), then output = {0,8} +// if bits = 3 (2'b01), then output = {7,8} +// +template +void Shuffle(NTuple& input, + NTuple& output) { + // get number of ones (number of valid entries) in the input + constexpr char kNumOnes = CountOnes(bits); + + // static asserts + static_assert(tuple_size > 0, + "tuple_size must strictly positive"); + static_assert(kNumOnes <= tuple_size, + "Number of valid bits in bits cannot exceed the size of the tuple"); + + // full crossbar to reorder valid entries of 'input' + UnrolledLoop<0, kNumOnes>([&](auto i) { + constexpr char pos = PositionOfNthOne(i + 1, bits) - 1; + output.template get() = input.template get(); + }); +} + +bool SubmitQuery9(queue& q, Database& dbinfo, std::string colour, + std::array& sum_profit, + double& kernel_latency, double& total_latency) { + // copy the regex string to character array, pad with NULL characters + std::array regex_word; + for (size_t i = 0; i < 11; i++) { + regex_word[i] = (i < colour.size()) ? colour[i] : '\0'; + } + + // create space for the input buffers + // the REGEX + buffer regex_word_buf(regex_word); + + // PARTS + buffer p_name_buf(dbinfo.p.name); + + // SUPPLIER + buffer s_nationkey_buf(dbinfo.s.nationkey); + + // PARTSUPPLIER + buffer ps_partkey_buf(dbinfo.ps.partkey); + buffer ps_suppkey_buf(dbinfo.ps.suppkey); + buffer ps_supplycost_buf(dbinfo.ps.supplycost); + + // ORDERS + buffer o_orderkey_buf(dbinfo.o.orderkey); + buffer o_orderdate_buf(dbinfo.o.orderdate); + + // LINEITEM + buffer l_orderkey_buf(dbinfo.l.orderkey); + buffer l_partkey_buf(dbinfo.l.partkey); + buffer l_suppkey_buf(dbinfo.l.suppkey); + buffer l_quantity_buf(dbinfo.l.quantity); + buffer l_extendedprice_buf(dbinfo.l.extendedprice); + buffer l_discount_buf(dbinfo.l.discount); + + // setup the output buffer (the profit for each nation and year) + buffer sum_profit_buf(sum_profit); + + // number of producing iterations depends on the number of elements per cycle + const size_t l_rows = dbinfo.l.rows; + const size_t l_iters = + (l_rows + kLineItemJoinWinSize - 1) / kLineItemJoinWinSize; + const size_t o_rows = dbinfo.o.rows; + const size_t o_iters = + (o_rows + kOrdersJoinWinSize - 1) / kOrdersJoinWinSize; + const size_t ps_rows = dbinfo.ps.rows; + const size_t ps_iters = + (ps_rows + kPartSupplierDuplicatePartkeys - 1) + / kPartSupplierDuplicatePartkeys; + const size_t p_rows = dbinfo.p.rows; + const size_t p_iters = + (p_rows + kRegexFilterElementsPerCycle - 1) + / kRegexFilterElementsPerCycle; + + // start timer + high_resolution_clock::time_point host_start = high_resolution_clock::now(); + + ///////////////////////////////////////////////////////////////////////////// + //// FilterParts Kernel: + //// Filter the PARTS table and produce the filtered LINEITEM table + auto filter_parts_event = q.submit([&](handler& h) { + // REGEX word accessor + accessor regex_word_accessor(regex_word_buf, h, read_only); + + // PARTS table accessors + accessor p_name_accessor(p_name_buf, h, read_only); + + // LINEITEM table accessors + accessor l_orderkey_accessor(l_orderkey_buf, h, read_only); + accessor l_partkey_accessor(l_partkey_buf, h, read_only); + accessor l_suppkey_accessor(l_suppkey_buf, h, read_only); + + // kernel to filter parts table based on REGEX + h.single_task([=]() [[intel::kernel_args_restrict]] { + // a map where the key is the partkey and the value is whether + // that partkeys name matches the given regex + bool partkeys_matching_regex[kPartTableSize + 1]; + + /////////////////////////////////////////////// + //// Stage 1 + // find valid parts with REGEX + LikeRegex<11, 55> regex[kRegexFilterElementsPerCycle]; + + // initialize regex word + for (size_t i = 0; i < 11; i++) { + const char c = regex_word_accessor[i]; + UnrolledLoop<0, kRegexFilterElementsPerCycle>([&](auto re) { + regex[re].word[i] = c; + }); + } + + // stream in rows of PARTS table and check partname against REGEX + [[intel::initiation_interval(1), intel::ivdep]] + for (size_t i = 0; i < p_iters; i++) { + UnrolledLoop<0, kRegexFilterElementsPerCycle>([&](auto re) { + const size_t idx = i * kRegexFilterElementsPerCycle + re; + const bool idx_range = idx < p_rows; + + // read in partkey + // valid partkeys in range [1,kPartTableSize] + const DBIdentifier partkey = idx_range ? idx + 1 : 0; + + // read in regex string + UnrolledLoop<0, 55>([&](auto k) { + regex[re].str[k] = p_name_accessor[idx * 55 + k]; + }); + + // run regex matching + regex[re].Match(); + + // mark valid partkey + if (idx_range) { + partkeys_matching_regex[partkey] = regex[re].Contains(); + } + }); + } + /////////////////////////////////////////////// + + /////////////////////////////////////////////// + //// Stage 2 + // read in the LINEITEM table (kLineItemJoinWinSize rows at a time) + // row is valid if its PARTKEY matched the REGEX + [[intel::initiation_interval(1)]] + for (size_t i = 0; i < l_iters + 1; i++) { + bool done = (i == l_iters); + bool valid = (i != l_iters); + + // bulk read of data from global memory + NTuple data; + + UnrolledLoop<0, kLineItemJoinWinSize>([&](auto j) { + size_t idx = i * kLineItemJoinWinSize + j; + bool in_range = idx < l_rows; + + DBIdentifier orderkey = l_orderkey_accessor[idx]; + DBIdentifier partkey = l_partkey_accessor[idx]; + DBIdentifier suppkey = l_suppkey_accessor[idx]; + + bool matches_partkey_name_regex = partkeys_matching_regex[partkey]; + bool data_is_valid = in_range && matches_partkey_name_regex; + + data.get() = LineItemMinimalRow(data_is_valid, idx, orderkey, + partkey, suppkey); + }); + + // write to pipe + LineItemPipe::write(LineItemMinimalRowPipeData(done, valid, data)); + } + /////////////////////////////////////////////// + }); + }); + /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////// + //// ProducerOrders Kernel: produce the ORDERS table + auto producer_orders_event = q.submit([&](handler& h) { + // ORDERS table accessors + accessor o_orderkey_accessor(o_orderkey_buf, h, read_only); + accessor o_orderdate_accessor(o_orderdate_buf, h, read_only); + + // produce ORDERS table (kOrdersJoinWinSize rows at a time) + h.single_task([=]() [[intel::kernel_args_restrict]] { + [[intel::initiation_interval(1)]] + for (size_t i = 0; i < o_iters + 1; i++) { + bool done = (i == o_iters); + bool valid = (i != o_iters); + + // bulk read of data from global memory + NTuple data; + + UnrolledLoop<0, kOrdersJoinWinSize>([&](auto j) { + size_t idx = i * kOrdersJoinWinSize + j; + bool in_range = idx < l_rows; + + DBIdentifier orderkey_tmp = o_orderkey_accessor[idx]; + DBDate orderdate = o_orderdate_accessor[idx]; + + DBIdentifier orderkey = + in_range ? orderkey_tmp : std::numeric_limits::max(); + + data.get() = OrdersRow(in_range, orderkey, orderdate); + }); + + // write to pipe + OrdersPipe::write(OrdersRowPipeData(done, valid, data)); + } + }); + }); + /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////// + //// JoinLineItemOrders Kernel: join the LINEITEM and ORDERS table + auto join_lineitem_orders_event = q.submit([&](handler& h) { + // kernel to join LINEITEM and ORDERS table + h.single_task([=]() [[intel::kernel_args_restrict]] { + // JOIN LINEITEM and ORDERS table + MergeJoin(); + + // join is done, tell downstream + LineItemOrdersPipe::write( + LineItemOrdersMinimalJoinedPipeData(true, false)); + }); + }); + /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////// + //// JoinPartSupplierSupplier Kernel: join the PARTSUPPLIER and SUPPLIER tables + auto join_partsupplier_supplier_event = q.submit([&](handler& h) { + // SUPPLIER table accessors + size_t s_rows = dbinfo.s.rows; + accessor s_nationkey_accessor(s_nationkey_buf, h, read_only); + + // kernel to join partsupplier and supplier tables + h.single_task( + [=]() [[intel::kernel_args_restrict]] { + // +1 is to account for fact that SUPPKEY is [1,kSF*10000] + unsigned char nation_key_map_data[kSupplierTableSize + 1]; + bool nation_key_map_valid[kSupplierTableSize + 1]; + for (int i = 0; i < kSupplierTableSize + 1; i++) { + nation_key_map_valid[i] = false; + } + + /////////////////////////////////////////////// + //// Stage 1 + // populate the array map + [[intel::initiation_interval(1)]] + for (size_t i = 0; i < s_rows; i++) { + // NOTE: based on TPCH docs, SUPPKEY is guaranteed + // to be unique in range [1:kSF*10000] + DBIdentifier s_suppkey = i + 1; + unsigned char s_nationkey = s_nationkey_accessor[i]; + + nation_key_map_data[s_suppkey] = s_nationkey; + nation_key_map_valid[s_suppkey] = true; + } + /////////////////////////////////////////////// + + /////////////////////////////////////////////// + //// Stage 2 + // MAPJOIN PARTSUPPLIER and SUPPLIER tables by suppkey + MapJoin(nation_key_map_data, + nation_key_map_valid); + + // tell downstream we are done + PartSupplierPartsPipe::write( + SupplierPartSupplierJoinedPipeData(true, false)); + /////////////////////////////////////////////// + }); + }); + ///////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////////// + //// ProducePartSupplier Kernel: produce the PARTSUPPLIER table + auto produce_part_supplier_event = q.submit([&](handler& h) { + // PARTSUPPLIER table accessors + accessor ps_partkey_accessor(ps_partkey_buf, h, read_only); + accessor ps_suppkey_accessor(ps_suppkey_buf, h, read_only); + accessor ps_supplycost_accessor(ps_supplycost_buf, h, read_only); + + // kernel to produce the PARTSUPPLIER table + h.single_task([=]() [[intel::kernel_args_restrict]] { + [[intel::initiation_interval(1)]] + for (size_t i = 0; i < ps_iters + 1; i++) { + bool done = (i == ps_iters); + bool valid = (i != ps_iters); + + // bulk read of data from global memory + NTuple data; + + UnrolledLoop<0, kPartSupplierDuplicatePartkeys>([&](auto j) { + size_t idx = i * kPartSupplierDuplicatePartkeys + j; + bool in_range = idx < ps_rows; + DBIdentifier partkey = ps_partkey_accessor[idx]; + DBIdentifier suppkey = ps_suppkey_accessor[idx]; + DBDecimal supplycost = ps_supplycost_accessor[idx]; + + data.get() = + PartSupplierRow(in_range, partkey, suppkey, supplycost); + }); + + // write to pipe + PartSupplierPipe::write(PartSupplierRowPipeData(done, valid, data)); + } + }); + }); + ///////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////////// + //// Compute Kernel: do the final computation on the data + auto computation_kernel_event = q.submit([&](handler& h) { + // LINEITEM table accessors + accessor l_quantity_accessor(l_quantity_buf, h, read_only); + accessor l_extendedprice_accessor(l_extendedprice_buf, h, read_only); + accessor l_discount_accessor(l_discount_buf, h, read_only); + + // output accessors + accessor sum_profit_accessor(sum_profit_buf, h, write_only, no_init); + + h.single_task([=]() [[intel::kernel_args_restrict]] { + // the accumulators + constexpr int kAccumCacheSize = 8; + NTuple> + sum_profit_local; + + // initialize the accumulators + UnrolledLoop<0, kFinalDataMaxSize>([&](auto j) { + sum_profit_local.template get().init(0); + }); + + bool done = false; + [[intel::initiation_interval(1)]] + do { + FinalPipeData pipe_data = FinalPipe::read(); + done = pipe_data.done; + + const bool pipeDataValid = !pipe_data.done && pipe_data.valid; + + UnrolledLoop<0, kFinalDataMaxSize>([&](auto j) { + FinalData D = pipe_data.data.get(); + + bool D_valid = pipeDataValid && D.valid; + unsigned int D_idx = D.lineitemIdx; + + // grab LINEITEM data from global memory and compute 'amount' + DBDecimal quantity=0, extendedprice=0, discount=0, supplycost=0; + if(D_valid) { + quantity = l_quantity_accessor[D_idx]; + extendedprice = l_extendedprice_accessor[D_idx]; + discount = l_discount_accessor[D_idx]; + supplycost = D.supplycost; + } + + // Why quantity x 100? So we can divide 'amount' by 100*100 later + DBDecimal amount = (extendedprice * (100 - discount)) - + (supplycost * quantity * 100); + + // compute index based on order year and nation + // See Date.hpp + unsigned int orderyear = (D.orderdate >> 9) & 0x07FFFFF; + unsigned int nation = D.nationkey; + unsigned char idx = (orderyear - 1992) * 25 + nation; + + unsigned char idx_final = D_valid ? idx : 0; + DBDecimal amount_final = D_valid ? amount : 0; + + auto current_amount = sum_profit_local.template get().read(idx_final); + auto computed_amount = current_amount + amount_final; + sum_profit_local.template get().write(idx_final, computed_amount); + }); + } while (!done); + + // push back the accumulated data to global memory + for (size_t n = 0; n < 25; n++) { + for (size_t y = 0; y < 7; y++) { + size_t in_idx = y * 25 + n; + size_t out_idx = (y + 1992) * 25 + n; + + DBDecimal amount = 0; + + UnrolledLoop<0, kFinalDataMaxSize>([&](auto j) { + amount += sum_profit_local.template get().read(in_idx); + }); + + sum_profit_accessor[out_idx] = amount; + } + } + }); + }); + ///////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////////// + //// FeedSort Kernel: kernel to filter out invalid data and feed the sorter + auto feed_sort_event = q.submit([&](handler& h) { + h.single_task([=]() [[intel::kernel_args_restrict]] { + bool done = false; + size_t num_rows = 0; + + [[intel::initiation_interval(1)]] + do { + // get data from upstream + bool valid; + LineItemOrdersMinimalJoinedPipeData pipe_data = + LineItemOrdersPipe::read(valid); + done = pipe_data.done && valid; + + if (!done && valid && pipe_data.valid) { + NTuple + shuffle_data; + unsigned char valid_count = 0; + char valid_bits = 0; + + // convert the 'valid' bits in the tuple to a bitset (valid_bits) + UnrolledLoop<0, kLineItemOrdersJoinWinSize>([&](auto i) { + constexpr char mask = 1 << i; + valid_bits |= pipe_data.data.get().valid ? mask : 0; + }); + + // full crossbar to do the shuffling from pipe_data to shuffle_data + UnrolledLoop<0, Pow2(kLineItemOrdersJoinWinSize)>([&](auto i) { + if (valid_bits == i) { + Shuffle(pipe_data.data, + shuffle_data); + valid_count = CountOnes(i); + } + }); + + // Send the data to sorter. + // The idea here is that this loop executes in the range + // [0,kLineItemOrdersJoinWinSize] times. + // However, we know that at most 6% of the data will match the filter + // and go to the sorter. So, that means for every ~16 pieces of + // data, we expect <1 will match the filter and go to the sorter. + // Therefore, so long as kLineItemOrdersJoinWinSize <= 16 + // this loop will, on average, execute ONCE per outer loop iteration + // (i.e. statistically, valid_count=1 for every 16 pieces of data). + // NOTE: for this loop to get good throughput it is VERY important to: + // A) Apply the [[intel::speculated_iterations(0)]] attribute + // B) Explicitly bound the loop iterations + // For an explanation why, see the optimize_inner_loops tutorial. + [[intel::initiation_interval(1), intel::speculated_iterations(0)]] + for (char i = 0; i < valid_count && + i < kLineItemOrdersJoinWinSize; i++) { + UnrolledLoop<0, kLineItemOrdersJoinWinSize>([&](auto j) { + if (j == i) { + SortInPipe::write(SortData(shuffle_data.get())); + } + }); + } + + num_rows += valid_count; + } + } while (!done); + + // send in pad data to ensure we send in exactly kSortSize elements + ShannonIterator i(num_rows, kSortSize); + + while (i.InRange()) { + SortInPipe::write( + SortData(0, std::numeric_limits::max(), 0, 0)); + + i.Step(); + } + + // drain the input pipe + while (!done) { + bool valid; + LineItemOrdersMinimalJoinedPipeData pipe_data = + LineItemOrdersPipe::read(valid); + done = pipe_data.done && valid; + } + }); + }); + ///////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////////// + //// ConsumeSort Kernel: consume the output of the sorter + auto consume_sort_event = q.submit([&](handler& h) { + h.single_task([=]() [[intel::kernel_args_restrict]] { + bool done = false; + size_t num_rows = 0; + + // read out data from the sorter until 'done' signal from upstream + [[intel::initiation_interval(1)]] + do { + bool valid; + SortData in_data = SortOutPipe::read(valid); + done = (in_data.partkey == std::numeric_limits::max()) && + valid; + num_rows += valid ? 1 : 0; + + if (!done && valid) { + NTuple<1, LineItemOrdersMinimalJoined> out_data; + out_data.get<0>() = LineItemOrdersMinimalJoined( + true, in_data.lineitemIdx, in_data.partkey, in_data.suppkey, + in_data.orderdate); + + LineItemOrdersSortedPipe::write( + LineItemOrdersMinimalSortedPipeData(false, true, out_data)); + } + } while (!done); + + // tell downstream kernel that the sort is done + LineItemOrdersSortedPipe::write( + LineItemOrdersMinimalSortedPipeData(true, false)); + + // drain the data we don't care about from the sorter + ShannonIterator i(num_rows, kSortSize); + while (i.InRange()) { + bool valid; + (void)SortOutPipe::read(valid); + + if (valid) { + i.Step(); + } + } + }); + }); + ///////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////////// + //// FifoSort Kernel: the sorter + auto sort_event = q.submit([&](handler& h) { + h.single_task([=]() [[intel::kernel_args_restrict]] { + ihc::sort(ihc::LessThan()); + }); + }); + ///////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////////// + //// JoinEverything Kernel: join the sorted + //// LINEITEM+ORDERS with SUPPLIER+PARTSUPPLIER + auto join_li_o_s_ps_event = q.submit([&](handler& h) { + h.single_task([=]() [[intel::kernel_args_restrict]] { + DuplicateMergeJoin(); + + // join is done, tell downstream + FinalPipe::write(FinalPipeData(true, false)); + }); + }); + ///////////////////////////////////////////////////////////////////////////// + + // wait for kernel to finish + filter_parts_event.wait(); + computation_kernel_event.wait(); + join_li_o_s_ps_event.wait(); + sort_event.wait(); + consume_sort_event.wait(); + feed_sort_event.wait(); + produce_part_supplier_event.wait(); + join_partsupplier_supplier_event.wait(); + join_lineitem_orders_event.wait(); + producer_orders_event.wait(); + + high_resolution_clock::time_point host_end = high_resolution_clock::now(); + duration diff = host_end - host_start; + + // gather profiling info + auto filter_parts_start = + filter_parts_event + .get_profiling_info(); + auto computation_end = + computation_kernel_event + .get_profiling_info(); + + // calculating the kernel execution time in ms + auto kernel_execution_time = (computation_end - filter_parts_start) * 1e-6; + + kernel_latency = kernel_execution_time; + total_latency = diff.count(); + + return true; +} diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.hpp new file mode 100644 index 0000000000..1103ff301f --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query9/query9_kernel.hpp @@ -0,0 +1,17 @@ +#ifndef __QUERY9_KERNEL_HPP__ +#define __QUERY9_KERNEL_HPP__ +#pragma once + +#include +#include + +#include "../dbdata.hpp" + +using namespace sycl; + +bool SubmitQuery9(queue& q, Database& dbinfo, + std::string colour, + std::array& sum_profit, + double& kernel_latency, double& total_latency); + +#endif //__QUERY9_KERNEL_HPP__