diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/README.md b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/README.md
index f986e0c499..3ae3e37a50 100644
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/README.md
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/README.md
@@ -25,6 +25,15 @@ USM, buffer, accessor, kernel, and command groups.
| Hardware | GEN9 or newer
Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA (Intel® PAC with Intel® Arria® 10 GX FPGA)
| Software | Intel® oneAPI DPC++/C++ Compiler
+> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for CPU, GPU, FPGA emulation, generating FPGA reports and generating RTL for FPGAs, there are extra software requirements for the FPGA simulation flow and FPGA compiles.
+>
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
+> - Questa*-Intel® FPGA Edition
+> - Questa*-Intel® FPGA Starter Edition
+> - ModelSim® SE
+>
+> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+
## Key Implementation Details
This sample provides examples of both buffers and USM implementations for simple side-by-side comparison.
@@ -111,19 +120,23 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
make fpga_emu
```
-2. Generate HTML performance reports.
+2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+3. Generate HTML performance reports.
```
make report
```
The reports reside at `simple-add_report.prj/reports/report.html`.
-3. Compile the program for FPGA hardware. (Compiling for hardware can take a long
+4. Compile the program for FPGA hardware. (Compiling for hardware can take a long
time.)
```
make fpga
```
-4. Clean the program. (Optional)
+5. Clean the program. (Optional)
```
make clean
```
@@ -168,19 +181,23 @@ time.)
```
nmake fpga_emu
```
-2. Generate HTML performance reports.
+2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+3. Generate HTML performance reports.
```
nmake report
```
The reports reside at `simple-add_report.prj/reports/report.html`.
-3. Compile the program for FPGA hardware. (Compiling for hardware can take a long
+4. Compile the program for FPGA hardware. (Compiling for hardware can take a long
time.)
```
nmake fpga
```
-4. Clean the program. (Optional)
+5. Clean the program. (Optional)
```
nmake clean
```
@@ -216,7 +233,12 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
./simple-add-buffers.fpga_emu
./simple-add-usm.fpga_emu
```
-3. Run on FPGA hardware.
+3. Run on FPGA simulator.
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./simple-add-buffers.fpga_sim
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./simple-add-usm.fpga_sim
+ ```
+4. Run on FPGA hardware.
```
./simple-add-buffers.fpga
./simple-add-usm.fpga
@@ -243,7 +265,14 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
simple-add-buffers.fpga_emu.exe
simple-add-usm.fpga_emu.exe
```
-3. Run on FPGA hardware.
+3. Run on FPGA simulator.
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ simple-add-buffers.fpga_sim.exe
+ simple-add-usm.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+4. Run on FPGA hardware.
```
simple-add-buffers.fpga.exe
simple-add-usm.fpga.exe
diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/CMakeLists.txt b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/CMakeLists.txt
index e40f21c606..a3512efeba 100755
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/CMakeLists.txt
@@ -48,24 +48,33 @@ add_custom_target(cpu-gpu DEPENDS ${TARGET_NAME})
# FPGA device selection
if(NOT DEFINED FPGA_DEVICE)
- set(FPGA_DEVICE "intel_a10gx_pac:pac_a10")
+ if(DEFINED USM AND (NOT(USM EQUAL 0)))
+ set(FPGA_DEVICE "intel_s10sx_pac:pac_s10_usm")
+ set(DEFAULT_BOARD_STR "Intel Stratix(R) 10 SX with USM support")
+ else()
+ set(FPGA_DEVICE "intel_a10gx_pac:pac_a10")
+ set(DEFAULT_BOARD_STR "Intel Arria(R) 10 GX")
+ endif()
message(STATUS "FPGA_DEVICE was not specified.\
- \nConfiguring the design to run on the default FPGA device ${FPGA_DEVICE} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \
- \nPlease refer to the README for information on device selection.")
+ \nConfiguring the design to run on the default FPGA board ${FPGA_DEVICE} (Intel(R) PAC with ${DEFAULT_BOARD_STR} FPGA). \
+ \nPlease refer to the README for information on board selection.")
else()
message(STATUS "Configuring the design to run on FPGA device ${FPGA_DEVICE}")
endif()
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages.
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-fsycl -Wall -fintelfpga ${WIN_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(HARDWARE_COMPILE_FLAGS "-fsycl -Wall -fintelfpga ${WIN_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -82,6 +91,19 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+# To compile in a single command:
+# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim
+# CMake executes:
+# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp
+# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-buffers.cpp b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-buffers.cpp
index 19c2b81f13..fed9b39fb7 100644
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-buffers.cpp
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-buffers.cpp
@@ -23,7 +23,7 @@
#include
#include
-#if FPGA || FPGA_EMULATOR
+#if FPGA_HARDWARE || FPGA_EMULATOR || FPGA_SIMULATOR
#include
#endif
@@ -84,13 +84,16 @@ int main() {
// Create device selector for the device of your interest.
#if FPGA_EMULATOR
// Intel extension: FPGA emulator selector on systems without FPGA card.
- ext::intel::fpga_emulator_selector d_selector;
-#elif FPGA
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#elif FPGA_SIMULATOR
+ // Intel extension: FPGA simulator selector on systems without FPGA card.
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
// Intel extension: FPGA selector on systems with FPGA card.
- ext::intel::fpga_selector d_selector;
+ auto selector = sycl::ext::intel::fpga_selector_v;
#else
// The default device selector will select the most performant device.
- auto d_selector{default_selector_v};
+ auto selector = default_selector_v;
#endif
// Create array objects with "array_size" to store data.
@@ -101,7 +104,7 @@ int main() {
for (size_t i = 0; i < sequential.size(); i++) sequential[i] = value + i;
try {
- queue q(d_selector, exception_handler);
+ queue q(selector, exception_handler);
// Print out the device information used for the kernel code.
cout << "Running on device: "
diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-usm.cpp b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-usm.cpp
index 2cc1e441b9..7f35aaf573 100644
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-usm.cpp
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/simple-add/src/simple-add-usm.cpp
@@ -23,7 +23,7 @@
#include
#include
-#if FPGA || FPGA_EMULATOR
+#if FPGA_HARDWARE || FPGA_EMULATOR || FPGA_SIMULATOR
#include
#endif
@@ -75,13 +75,16 @@ int main() {
// Create device selector for the device of your interest.
#if FPGA_EMULATOR
// Intel extension: FPGA emulator selector on systems without FPGA card.
- ext::intel::fpga_emulator_selector selector;
-#elif FPGA
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#elif FPGA_SIMULATOR
+ // Intel extension: FPGA simulator selector on systems without FPGA card.
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
// Intel extension: FPGA selector on systems with FPGA card.
- ext::intel::fpga_selector selector;
+ auto selector = sycl::ext::intel::fpga_selector_v;
#else
// The default device selector will select the most performant device.
- auto selector{default_selector_v};
+ auto selector = default_selector_v;
#endif
constexpr int value = 100000;
diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/README.md b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/README.md
index 0e723987b5..2be22bb319 100755
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/README.md
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/README.md
@@ -28,6 +28,15 @@ This sample provides example implementations of both Unified Shared Memory (USM)
| Hardware | GEN9 or newer
Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA (Intel® PAC with Intel® Arria® 10 GX FPGA)
| Software | Intel® oneAPI DPC++/C++ Compiler
+> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for CPU, GPU, FPGA emulation, generating FPGA reports and generating RTL for FPGAs, there are extra software requirements for the FPGA simulation flow and FPGA compiles.
+>
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
+> - Questa*-Intel® FPGA Edition
+> - Questa*-Intel® FPGA Starter Edition
+> - ModelSim® SE
+>
+> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+
## Key Implementation Details
The basic SYCL implementation explained in the code includes device selector, USM, buffer, accessor, kernel, and command groups.
@@ -111,19 +120,23 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
make fpga_emu
```
-2. Generate HTML performance reports.
+2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+3. Generate HTML performance reports.
```
make report
```
The reports reside at `simple-add_report.prj/reports/report.html`.
-3. Compile the program for FPGA hardware. (Compiling for hardware can take a long
+4. Compile the program for FPGA hardware. (Compiling for hardware can take a long
time.)
```
make fpga
```
-4. Clean the program. (Optional)
+5. Clean the program. (Optional)
```
make clean
```
@@ -168,19 +181,23 @@ time.)
```
nmake fpga_emu
```
-2. Generate HTML performance reports.
+2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+3. Generate HTML performance reports.
```
nmake report
```
The reports reside at `simple-add_report.prj/reports/report.html`.
-3. Compile the program for FPGA hardware. (Compiling for hardware can take a long
+4. Compile the program for FPGA hardware. (Compiling for hardware can take a long
time.)
```
nmake fpga
```
-4. Clean the program. (Optional)
+5. Clean the program. (Optional)
```
nmake clean
```
@@ -221,7 +238,12 @@ The source files (`vector-add-buffers.cpp` and `vector-add-usm.cpp`) specify the
./vector-add-buffers.fpga_emu
./vector-add-usm.fpga_emu
```
-3. Run on FPGA hardware.
+3. Run on FPGA simulator.
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./vector-add-buffers.fpga_sim
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./vector-add-usm.fpga_sim
+ ```
+4. Run on FPGA hardware.
```
./vector-add-buffers.fpga
./vector-add-usm.fpga
@@ -248,7 +270,14 @@ The source files (`vector-add-buffers.cpp` and `vector-add-usm.cpp`) specify the
vector-add-buffers.fpga_emu.exe
vector-add-usm.fpga_emu.exe
```
-3. Run on FPGA hardware.
+3. Run on FPGA simulator.
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ vector-add-buffers.fpga_sim.exe
+ vector-add-usm.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+4. Run on FPGA hardware.
```
vector-add-buffers.fpga.exe
vector-add-usm.fpga.exe
diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/CMakeLists.txt b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/CMakeLists.txt
index d88bf5b824..a46dfe42b5 100755
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/CMakeLists.txt
@@ -48,24 +48,34 @@ add_custom_target(cpu-gpu DEPENDS ${TARGET_NAME})
# FPGA device selection
if(NOT DEFINED FPGA_DEVICE)
- set(FPGA_DEVICE "intel_a10gx_pac:pac_a10")
+ if(DEFINED USM AND (NOT(USM EQUAL 0)))
+ set(FPGA_DEVICE "intel_s10sx_pac:pac_s10_usm")
+ set(DEFAULT_BOARD_STR "Intel Stratix(R) 10 SX with USM support")
+ else()
+ set(FPGA_DEVICE "intel_a10gx_pac:pac_a10")
+ set(DEFAULT_BOARD_STR "Intel Arria(R) 10 GX")
+ endif()
message(STATUS "FPGA_DEVICE was not specified.\
- \nConfiguring the design to run on the default FPGA device ${FPGA_DEVICE} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \
- \nPlease refer to the README for information on device selection.")
+ \nConfiguring the design to run on the default FPGA board ${FPGA_DEVICE} (Intel(R) PAC with ${DEFAULT_BOARD_STR} FPGA). \
+ \nPlease refer to the README for information on board selection.")
else()
message(STATUS "Configuring the design to run on FPGA device ${FPGA_DEVICE}")
endif()
+
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages.
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-fsycl -Wall -fintelfpga -DFPGA_EMULATOR ${WIN_FLAG}")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(HARDWARE_COMPILE_FLAGS "-fsycl -Wall -fintelfpga -DFPGA ${WIN_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -82,6 +92,19 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+# To compile in a single command:
+# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim
+# CMake executes:
+# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp
+# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-buffers.cpp b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-buffers.cpp
index 4c895637b5..8896fdffdc 100755
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-buffers.cpp
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-buffers.cpp
@@ -23,7 +23,7 @@
#include
#include
#include
-#if FPGA || FPGA_EMULATOR
+#if FPGA_HARDWARE || FPGA_EMULATOR || FPGA_SIMULATOR
#include
#endif
@@ -109,13 +109,16 @@ int main(int argc, char* argv[]) {
// Create device selector for the device of your interest.
#if FPGA_EMULATOR
// Intel extension: FPGA emulator selector on systems without FPGA card.
- ext::intel::fpga_emulator_selector d_selector;
-#elif FPGA
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#elif FPGA_SIMULATOR
+ // Intel extension: FPGA simulator selector on systems without FPGA card.
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
// Intel extension: FPGA selector on systems with FPGA card.
- ext::intel::fpga_selector d_selector;
+ auto selector = sycl::ext::intel::fpga_selector_v;
#else
// The default device selector will select the most performant device.
- auto d_selector{default_selector_v};
+ auto selector = default_selector_v;
#endif
// Create vector objects with "vector_size" to store the input and output data.
@@ -130,7 +133,7 @@ int main(int argc, char* argv[]) {
InitializeVector(b);
try {
- queue q(d_selector, exception_handler);
+ queue q(selector, exception_handler);
// Print out the device information used for the kernel code.
std::cout << "Running on device: "
diff --git a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-usm.cpp b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-usm.cpp
index d4b4648831..ba28b09713 100755
--- a/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-usm.cpp
+++ b/DirectProgramming/C++SYCL/DenseLinearAlgebra/vector-add/src/vector-add-usm.cpp
@@ -23,7 +23,7 @@
#include
#include
#include
-#if FPGA || FPGA_EMULATOR
+#if FPGA_HARDWARE || FPGA_EMULATOR || FPGA_SIMULATOR
#include
#endif
@@ -83,17 +83,20 @@ int main(int argc, char* argv[]) {
// Create device selector for the device of your interest.
#if FPGA_EMULATOR
// Intel extension: FPGA emulator selector on systems without FPGA card.
- ext::intel::fpga_emulator_selector d_selector;
-#elif FPGA
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#elif FPGA_SIMULATOR
+ // Intel extension: FPGA simulator selector on systems without FPGA card.
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
// Intel extension: FPGA selector on systems with FPGA card.
- ext::intel::fpga_selector d_selector;
+ auto selector = sycl::ext::intel::fpga_selector_v;
#else
// The default device selector will select the most performant device.
- auto d_selector{default_selector_v};
+ auto selector = default_selector_v;
#endif
try {
- queue q(d_selector, exception_handler);
+ queue q(selector, exception_handler);
// Print out the device information used for the kernel code.
std::cout << "Running on device: "
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/README.md
index 874c744aef..4ab42594ac 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/README.md
@@ -42,7 +42,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -166,13 +166,17 @@ The design uses the following generic header files.
```
make fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
make report
```
The report resides at `anr_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
@@ -201,13 +205,17 @@ The design uses the following generic header files.
```
nmake fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
nmake report
```
The report resides at `anr_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
@@ -218,10 +226,14 @@ The design uses the following generic header files.
### On Linux
1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- ./anr.fpga_emu
- ```
-2. Alternatively, run the sample on the FPGA device.
+ ```
+ ./anr.fpga_emu
+ ```
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./anr.fpga_sim
+ ```
+3. Alternatively, run the sample on the FPGA device.
```
./anr.fpga
```
@@ -229,10 +241,16 @@ The design uses the following generic header files.
### On Windows
1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- anr.fpga_emu.exe
- ```
-2. Alternatively, run the sample on the FPGA device.
+ ```
+ anr.fpga_emu.exe
+ ```
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ anr.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Alternatively, run the sample on the FPGA device.
```
anr.fpga.exe
```
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/CMakeLists.txt
index 1be9406709..d6ee2236af 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(TARGET_NAME anr)
set(SOURCE_FILE main.cpp)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# FPGA board selection
@@ -119,10 +120,12 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} -fsycl -fintelfpga ${AC_TYPES_FLAG} ${FILTER_SIZE_FLAG} ${PIXELS_PER_CYCLE_FLAG} ${MAX_COLS_FLAG} ${PIXEL_BITS_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} ${AC_TYPES_FLAG} ${FILTER_SIZE_FLAG} ${PIXELS_PER_CYCLE_FLAG} ${MAX_COLS_FLAG} ${PIXEL_BITS_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} ${FILTER_SIZE_FLAG} ${PIXELS_PER_CYCLE_FLAG} ${MAX_COLS_FLAG} ${PIXEL_BITS_FLAG}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} -fsycl -fintelfpga ${AC_TYPES_FLAG} ${FILTER_SIZE_FLAG} ${PIXELS_PER_CYCLE_FLAG} ${MAX_COLS_FLAG} ${PIXEL_BITS_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} ${AC_TYPES_FLAG} ${FILTER_SIZE_FLAG} ${PIXELS_PER_CYCLE_FLAG} ${MAX_COLS_FLAG} ${PIXEL_BITS_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware ${PROFILE_FLAG} ${FLAT_COMPILE_FLAG} -Xsparallel=2 ${SEED_FLAG} -Xstarget=${FPGA_DEVICE} ${FILTER_SIZE_FLAG} ${PIXELS_PER_CYCLE_FLAG} ${MAX_COLS_FLAG} ${PIXEL_BITS_FLAG} ${IP_MODE_FLAG} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} ${AC_TYPES_FLAG} ${FILTER_SIZE_FLAG} ${PIXELS_PER_CYCLE_FLAG} ${MAX_COLS_FLAG} ${PIXEL_BITS_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "${REPORT_LINK_FLAGS} ${AC_TYPES_FLAG}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -135,6 +138,15 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/main.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/main.cpp
index cb06fdc758..ad9d8ae466 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/main.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/src/main.cpp
@@ -42,9 +42,12 @@ int main(int argc, char* argv[]) {
// reading and validating the command line arguments
std::string data_dir = "../test_data";
bool passed = true;
-#ifdef FPGA_EMULATOR
+#if defined(FPGA_EMULATOR)
int runs = 2;
int frames = 2;
+#elif defined(FPGA_SIMULATOR)
+ int runs = 2;
+ int frames = 1;
#else
int runs = 2;
int frames = 8;
@@ -78,11 +81,12 @@ int main(int argc, char* argv[]) {
}
/////////////////////////////////////////////////////////////
- // the device selector
-#ifdef FPGA_EMULATOR
- ext::intel::fpga_emulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// create the device queue
@@ -96,6 +100,13 @@ int main(int argc, char* argv[]) {
std::terminate();
}
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
+
// parse the input files
int cols, rows, pixel_count;
ANRParams params;
@@ -321,9 +332,17 @@ void ParseFiles(std::string data_dir, std::vector& in_pixels,
ANRParams& params) {
// parse the pixel data files
int noisy_w, noisy_h;
+#if FPGA_SIMULATOR
+ ParseDataFile(data_dir + "/small_input_noisy.data", in_pixels, noisy_w, noisy_h);
+#else
ParseDataFile(data_dir + "/input_noisy.data", in_pixels, noisy_w, noisy_h);
+#endif
int ref_w, ref_h;
+#if FPGA_SIMULATOR
+ ParseDataFile(data_dir + "/small_output_ref.data", ref_pixels, ref_w, ref_h);
+#else
ParseDataFile(data_dir + "/output_ref.data", ref_pixels, ref_w, ref_h);
+#endif
// ensure the dimensions match
if (noisy_w != ref_w) {
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/test_data/small_input_noisy.data b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/test_data/small_input_noisy.data
new file mode 100755
index 0000000000..fe63694707
--- /dev/null
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/test_data/small_input_noisy.data
@@ -0,0 +1,2 @@
+16 32
+183 192 142 150 148 205 185 140 197 239 155 141 237 229 119 235 189 155 183 192 180 155 156 145 196 170 163 197 180 163 130 177 186 237 167 154 175 206 194 144 189 234 122 208 164 189 255 204 149 149 149 203 201 238 242 227 222 163 168 157 178 198 178 115 174 185 183 191 195 224 151 166 255 191 224 158 169 217 216 156 255 176 209 133 210 219 201 141 193 183 242 193 199 255 145 210 226 146 242 221 181 170 231 113 127 201 220 116 231 180 238 255 188 132 191 111 185 176 169 226 202 160 244 196 167 167 209 204 236 176 255 208 204 168 210 190 206 126 159 176 255 224 201 146 161 249 255 167 220 209 196 188 170 159 153 216 157 120 184 176 206 185 180 139 163 155 191 128 198 196 211 185 172 162 159 149 255 173 179 141 160 165 217 181 232 204 182 178 154 172 189 220 230 197 169 152 200 116 204 191 253 230 152 231 199 170 224 220 199 181 173 159 168 199 152 183 128 223 186 195 212 160 227 185 252 173 206 246 253 210 255 255 162 157 194 233 229 252 255 166 153 183 154 170 153 126 171 133 217 115 198 255 255 176 203 183 255 228 182 255 237 184 240 104 200 156 248 154 255 148 255 130 180 225 209 210 202 205 156 176 210 223 203 191 204 199 202 190 172 167 164 191 225 172 214 215 197 160 220 126 228 199 156 152 190 175 112 248 230 169 207 202 255 171 233 142 231 183 236 167 245 248 191 185 184 144 159 214 221 148 188 205 202 175 161 157 229 98 183 159 171 165 190 128 182 152 173 235 178 130 164 118 157 167 255 210 142 185 193 154 239 193 216 232 255 230 197 203 187 117 183 140 184 156 255 175 195 189 225 109 200 160 255 79 226 141 201 192 192 218 255 183 183 216 255 134 221 157 255 163 144 190 204 181 221 178 183 177 140 201 152 160 191 140 156 204 207 167 209 148 168 222 201 207 255 133 255 177 199 185 142 119 219 247 236 188 220 202 222 140 221 117 227 200 177 187 171 205 255 160 194 168 177 146 212 193 174 187 255 164 163 194 172 255 237 161 176 152 250 225 234 209 128 220 166 194 179 181 176 162 168 235 149 189 255 147 191 161 117 120 113 161 255 107 174 120 151 174 206 150 255 156 224 110 223 209 183 129 170 174 255 184
\ No newline at end of file
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/test_data/small_output_ref.data b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/test_data/small_output_ref.data
new file mode 100755
index 0000000000..ba65ac3c93
--- /dev/null
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/anr/test_data/small_output_ref.data
@@ -0,0 +1,2 @@
+16 32
+170 183 169 175 175 188 175 167 194 202 170 168 208 207 139 195 193 178 188 171 189 166 183 168 180 167 168 176 169 171 155 179 185 191 184 160 184 181 181 151 177 191 143 191 171 186 208 185 172 173 177 192 187 194 200 195 200 182 187 174 181 168 178 129 184 188 182 184 189 189 184 181 203 193 197 173 204 189 204 166 205 176 202 162 196 177 193 170 190 187 188 191 184 206 167 201 205 177 206 188 190 179 201 132 167 178 209 131 204 178 213 194 184 156 178 142 181 163 188 187 195 175 197 186 190 179 191 188 215 187 211 184 210 177 201 171 206 163 189 164 211 192 208 166 201 199 201 175 191 189 185 180 179 178 176 179 177 160 177 179 190 167 191 158 189 166 193 152 192 177 193 175 191 176 180 166 188 162 178 161 186 167 195 173 199 176 197 182 186 180 187 183 211 181 196 177 206 156 196 184 206 188 192 190 205 188 199 182 199 183 191 178 180 187 175 184 171 190 181 186 186 177 193 179 218 182 217 205 215 190 211 211 200 194 214 207 212 198 211 172 179 164 175 157 178 151 188 150 206 150 210 193 216 176 215 191 217 206 207 205 211 176 212 127 219 156 225 152 223 160 220 161 200 188 195 182 186 192 183 188 183 189 182 186 185 185 187 182 190 178 194 185 198 181 205 184 211 174 208 158 203 174 171 165 188 177 146 183 204 175 212 173 216 167 217 166 219 168 218 171 216 199 193 183 197 167 192 178 197 170 194 176 199 164 192 161 194 122 187 160 185 170 178 170 178 174 176 174 176 154 178 145 173 182 198 182 175 178 207 181 216 181 214 193 215 197 192 174 189 153 189 157 198 158 209 158 211 164 217 136 214 166 221 84 217 170 206 181 201 184 214 184 206 179 228 164 214 166 223 163 185 174 194 175 189 184 182 183 168 182 170 179 169 170 173 174 183 176 187 179 187 177 203 178 220 164 219 168 218 181 184 148 202 193 207 169 215 168 219 155 216 150 209 166 200 171 196 179 217 159 202 170 194 166 203 177 192 176 212 180 188 177 186 200 205 174 198 175 213 186 195 190 158 190 167 183 165 173 169 167 173 190 175 179 207 166 186 164 116 139 119 151 214 122 174 131 183 153 203 150 216 162 218 136 212 171 211 136 200 176 222 176
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/README.md
index a667086802..f74484a9f7 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/README.md
@@ -45,7 +45,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/CMakeLists.txt
index 4485c4d160..e11d99f43b 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/CMakeLists.txt
@@ -30,9 +30,9 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "${PLATFORM_SPECIFIC_COMPILE_FLAGS} -fsycl -fintelfpga -DFPGA_EMULATOR -Wformat-security -Werror=format-security -Wall")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -DFPGA_EMULATOR -Wformat-security -Werror=format-security -Wall")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "${PLATFORM_SPECIFIC_COMPILE_FLAGS} -fsycl -fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -DFPGA_HARDWARE")
# By default oneAPI compiler burst interleaves across same memory type,
# -Xsno-interleaving is used to disable burst interleaving and test each memory bank independently
# Refer to https://www.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide/top/flags-attr-prag-ext/optimization-flags/disabl-burst-int.html for more information
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/board_test.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/board_test.cpp
index cb011358cc..ad9a244d9a 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/board_test.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/board_test/src/board_test.cpp
@@ -49,10 +49,10 @@ int main(int argc, char* argv[]) {
// - the FPGA emulator device (CPU emulation of the FPGA) using FPGA_EMULATOR
// macro
// - the FPGA device (a real FPGA)
-#if defined(FPGA_EMULATOR)
- sycl::ext::intel::fpga_emulator_selector device_selector;
-#else
- sycl::ext::intel::fpga_selector device_selector;
+#if FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// Variable ORed with result of each test
@@ -66,11 +66,14 @@ int main(int argc, char* argv[]) {
// Create a queue bound to the chosen device
// If the device is unavailable, a SYCL runtime exception is thrown
- sycl::queue q(device_selector, fpga_tools::exception_handler, q_prop_list);
+ sycl::queue q(selector, fpga_tools::exception_handler, q_prop_list);
+
+ auto device = q.get_device();
// Print out the device information.
std::cout << "Running on device: "
- << q.get_device().get_info() << "\n";
+ << device.get_info()
+ << std::endl;
// Create a oneAPI Shim object
ShimMetrics hldshim(q);
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/README.md
index 6aa239cf13..f38d47df95 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/README.md
@@ -49,7 +49,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -161,13 +161,17 @@ For `constexpr_math.hpp`, `memory_utils.hpp`, `metaprogramming_utils.hpp`, and `
```
make fpga_emu
```
- 2. Generate the HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate the HTML performance report.
```
make report
```
The report resides at `cholesky_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
@@ -194,11 +198,15 @@ For `constexpr_math.hpp`, `memory_utils.hpp`, `metaprogramming_utils.hpp`, and `
```
nmake fpga_emu
```
- 2. Generate the HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate the HTML performance report.
```
nmake report
```
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
@@ -224,7 +232,11 @@ You can apply the Cholesky decomposition to a number of matrices, as shown below
```
./cholesky.fpga_emu
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator.
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./cholesky.fpga_sim
+ ```
+3. Run the sample on the FPGA device.
```
./cholesky.fpga
```
@@ -235,7 +247,13 @@ You can apply the Cholesky decomposition to a number of matrices, as shown below
```
cholesky.fpga_emu.exe
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator.
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ cholesky.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device.
```
cholesky.fpga.exe
```
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/CMakeLists.txt
index cc476ecb55..e52aa0d3d3 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/CMakeLists.txt
@@ -79,11 +79,11 @@ message(STATUS "SEED=${SEED}")
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -fsycl -fintelfpga -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS}")
set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -DFPGA_SIMULATOR -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -Xsfp-relaxed ${USER_SIMULATOR_FLAGS}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} -Xssimulation -Xsghdl -Xsclock=${CLOCK_TARGET} -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} -Xsfp-relaxed")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fsycl -fintelfpga -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -Xsfp-relaxed")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -Xsfp-relaxed -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} -Xshardware -Xsclock=${CLOCK_TARGET} -Xsparallel=2 ${SEED} -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} -Xsfp-relaxed")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/cholesky_demo.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/cholesky_demo.cpp
index 68d0507265..e1a7414a75 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/cholesky_demo.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky/src/cholesky_demo.cpp
@@ -87,23 +87,24 @@ int main(int argc, char *argv[]) {
}
try {
- // SYCL boilerplate
-#if defined(FPGA_EMULATOR)
- sycl::ext::intel::fpga_emulator_selector device_selector;
-#elif defined(FPGA_SIMULATOR)
- sycl::ext::intel::fpga_simulator_selector device_selector;
-#else
- sycl::ext::intel::fpga_selector device_selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// Enable the queue profiling to time the execution
sycl::queue q = sycl::queue(
- device_selector, fpga_tools::exception_handler,
+ selector, fpga_tools::exception_handler,
sycl::property_list{sycl::property::queue::enable_profiling()});
sycl::device device = q.get_device();
- std::cout << "Device name: "
- << device.get_info().c_str()
- << std::endl;
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
// Select a type for this compile depending on the value of COMPLEX
using T = std::conditional_t, float>;
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/README.md
index 01f7bb6598..88dcfbb230 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/README.md
@@ -62,7 +62,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -180,19 +180,23 @@ Additionaly, the cmake build system can be configured using the following parame
3. Compile the design. (The provided targets match the recommended development flow.)
1. Compile for emulation (fast compile time, targets emulated FPGA device).
- ```
- make fpga_emu
- ```
- 2. Generate the HTML performance report.
- ```
- make report
- ```
+ ```
+ make fpga_emu
+ ```
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate the HTML performance report.
+ ```
+ make report
+ ```
The report resides at `cholesky_inversion_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
- ```
- make fpga
- ```
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ ```
+ make fpga
+ ```
(Optional) The hardware compiles listed above can take several hours to complete; alternatively, you can download FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/cholesky_inversion.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/cholesky_inversion.fpga.tar.gz).
@@ -218,13 +222,17 @@ Additionaly, the cmake build system can be configured using the following parame
```
nmake fpga_emu
```
- 2. Generate the HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate the HTML performance report.
```
nmake report
```
The report resides at `cholesky_inversion_report.a.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
@@ -251,7 +259,11 @@ You can apply the Cholesky-based inversion to 8 matrices repeated a number of ti
```
./cholesky_inversion.fpga_emu
```
-2. Run on the FPGA device.
+2. Run the sample on the FPGA simulator.
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./cholesky_inversion.fpga_sim
+ ```
+3. Run on the FPGA device.
```
./cholesky_inversion.fpga
```
@@ -262,7 +274,13 @@ You can apply the Cholesky-based inversion to 8 matrices repeated a number of ti
```
cholesky_inversion.fpga_emu.exe
```
-2. Run on the FPGA device.
+2. Run the sample on the FPGA simulator.
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ cholesky_inversion.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run on the FPGA device.
```
cholesky_inversion.fpga.exe
```
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/CMakeLists.txt
index 79d4ac5a97..1b464c424e 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/CMakeLists.txt
@@ -88,11 +88,11 @@ message(STATUS "SEED=${SEED}")
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -fsycl -fintelfpga -DFIXED_ITERATIONS_DECOMPOSITION=${FIXED_ITERATIONS_DECOMPOSITION} -DFIXED_ITERATIONS_INVERSION=${FIXED_ITERATIONS_INVERSION} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS_DECOMPOSITION=${FIXED_ITERATIONS_DECOMPOSITION} -DFIXED_ITERATIONS_INVERSION=${FIXED_ITERATIONS_INVERSION} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS}")
set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -DFPGA_SIMULATOR -DFIXED_ITERATIONS_DECOMPOSITION=${FIXED_ITERATIONS_DECOMPOSITION} -DFIXED_ITERATIONS_INVERSION=${FIXED_ITERATIONS_INVERSION} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -Xsfp-relaxed ${USER_HARDWARE_FLAGS}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} -Xssimulation -Xsghdl -Xsclock=${CLOCK_TARGET} -Xstarget=${FPGA_DEVICE} ${USER_SIMULATOR_FLAGS} -Xsfp-relaxed")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fsycl -fintelfpga -fbracket-depth=512 -DFIXED_ITERATIONS_DECOMPOSITION=${FIXED_ITERATIONS_DECOMPOSITION} -DFIXED_ITERATIONS_INVERSION=${FIXED_ITERATIONS_INVERSION} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -Xsfp-relaxed")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS_DECOMPOSITION=${FIXED_ITERATIONS_DECOMPOSITION} -DFIXED_ITERATIONS_INVERSION=${FIXED_ITERATIONS_INVERSION} -DCOMPLEX=${COMPLEX} -DMATRIX_DIMENSION=${MATRIX_DIMENSION} -Xsfp-relaxed -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} -Xshardware -Xsclock=${CLOCK_TARGET} -Xsparallel=2 ${SEED} -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} -Xsfp-relaxed")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/cholesky_inversion_demo.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/cholesky_inversion_demo.cpp
index 4b1b166bcc..1b3cf218e2 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/cholesky_inversion_demo.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/cholesky_inversion/src/cholesky_inversion_demo.cpp
@@ -357,23 +357,24 @@ int main(int argc, char *argv[]) {
}
try {
- // SYCL boilerplate
-#if defined(FPGA_EMULATOR)
- sycl::ext::intel::fpga_emulator_selector device_selector;
-#elif defined(FPGA_SIMULATOR)
- sycl::ext::intel::fpga_simulator_selector device_selector;
-#else
- sycl::ext::intel::fpga_selector device_selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// Enable the queue profiling to time the execution
sycl::queue q = sycl::queue(
- device_selector, fpga_tools::exception_handler,
+ selector, fpga_tools::exception_handler,
sycl::property_list{sycl::property::queue::enable_profiling()});
sycl::device device = q.get_device();
- std::cout << "Device name: "
- << device.get_info().c_str()
- << std::endl;
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
// Select a type for this compile depending on the value of COMPLEX
using T = std::conditional_t, float>;
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/README.md
index 13f3ac38bf..4c1e0e1a76 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/README.md
@@ -44,7 +44,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -168,9 +168,9 @@ This design measures the FPGA performance to determine how many assets can be pr
make fpga_emu
```
2. Generate the HTML performance report.
- ```
- make report
- ```
+ ```
+ make report
+ ```
The report resides at `/reports/report.html`.
3. Compile for FPGA hardware (longer compile time, targets FPGA device).
@@ -218,33 +218,42 @@ This design measures the FPGA performance to determine how many assets can be pr
### On Linux
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- ./crr.fpga_emu [-o=]
- ```
- where:
- - `` is an **optional** argument to specify the input data file name. The default input file is `/data/ordered_inputs.csv`.
- - `-o=` is an **optional** argument to specify the name of the output file. The default name of the output file is `ordered_outputs.csv`.
-
- 2. Run the sample on the FPGA device.
- ```
- ./crr.fpga [-o=]
- ```
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
+ ```
+ ./crr.fpga_emu [-o=]
+ ```
+ where:
+ - `` is an **optional** argument to specify the input data file name. The default input file is `/data/ordered_inputs.csv`.
+ - `-o=` is an **optional** argument to specify the name of the output file. The default name of the output file is `ordered_outputs.csv`.
+2. Run the sample on the FPGA simulator.
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./crr.fpga_sim [-o=]
+ ```
+3. Run the sample on the FPGA device.
+ ```
+ ./crr.fpga [-o=]
+ ```
### On Windows
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- crr.fpga_emu.exe [-o=]
- ```
- where:
- - `` is an **optional** argument to specify the input data file name. The default input file is `/data/ordered_inputs.csv`.
- - `-o=` is an **optional** argument to specify the name of the output file. The default name of the output file is `ordered_outputs.csv`.
-
- 2. Run the sample on the FPGA device.
- ```
- crr.fpga.exe [-o=]
- ```
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
+ ```
+ crr.fpga_emu.exe [-o=]
+ ```
+ where:
+ - `` is an **optional** argument to specify the input data file name. The default input file is `/data/ordered_inputs.csv`.
+ - `-o=` is an **optional** argument to specify the name of the output file. The default name of the output file is `ordered_outputs.csv`.
+2. Run the sample on the FPGA simulator.
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ crr.fpga_sim.exe [-o=]
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+
+3. Run the sample on the FPGA device.
+ ```
+ crr.fpga.exe [-o=]
+ ```
## Example Output
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/CMakeLists.txt
index 4035ba1c89..448a5a0769 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(TARGET_NAME crr)
set(SOURCE_FILE main.cpp)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# FPGA board selection
@@ -53,14 +54,17 @@ message(STATUS "OUTER_UNROLL_POW2=${OUTER_UNROLL_POW2}")
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} -Xsdaz -Xsrounding=faithful -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xsdaz -Xsrounding=faithful -Xsparallel=2 ${SEED} -Xstarget=${FPGA_DEVICE} -DOUTER_UNROLL=${OUTER_UNROLL} -DINNER_UNROLL=${INNER_UNROLL} -DOUTER_UNROLL_POW2=${OUTER_UNROLL_POW2} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
# Copy input data
configure_file("data/ordered_inputs.csv" "data/ordered_inputs.csv" COPYONLY)
+configure_file("data/ordered_inputs.csv" "data/small_ordered_inputs.csv" COPYONLY)
###############################################################################
### FPGA Emulator
@@ -71,6 +75,15 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/main.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/main.cpp
index 099bf45125..ac229bec89 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/main.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/crr/src/main.cpp
@@ -250,8 +250,8 @@ double CrrSolver(const int n_items, vector &in_params,
// Update optval[] -- calculate each level of the binomial tree.
// reg[] helps to achieve updating INNER_UNROLL elements in optval[]
// simultaneously.
- [[intel::disable_loop_pipelining]] for (short t = 0;
- t <= steps - 1; ++t) {
+ [[intel::disable_loop_pipelining]] // NO-FORMAT: Attribute
+ for (short t = 0; t <= steps - 1; ++t) {
[[intel::fpga_register]] double reg[INNER_UNROLL + 1][OUTER_UNROLL];
double val_1, val_2;
@@ -264,8 +264,8 @@ double CrrSolver(const int n_items, vector &in_params,
// L4:
// Calculate all the elements in optval[] -- all the tree nodes
// for one level of the tree
- [[intel::ivdep]] for (int n = 0; n <= steps - 1 - t;
- n += INNER_UNROLL) {
+ [[intel::ivdep]] // NO-FORMAT: Attribute
+ for (int n = 0; n <= steps - 1 - t; n += INNER_UNROLL) {
#pragma unroll
for (short ic = 0; ic < OUTER_UNROLL; ++ic) {
@@ -732,20 +732,20 @@ int main(int argc, char *argv[]) {
}
try {
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
-#endif
- queue q(device_selector, fpga_tools::exception_handler);
+#if FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#endif
- std::cout << "Running on device: "
- << q.get_device().get_info().c_str() << "\n";
+ queue q(selector, fpga_tools::exception_handler);
device device = q.get_device();
- std::cout << "Device name: "
- << device.get_info().c_str() << "\n \n \n";
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
vector inp;
@@ -820,17 +820,19 @@ int main(int argc, char *argv[]) {
vector in_buff_params(n_crrs * 3);
vector in_buff2_params(n_crrs * 3);
- vector res_params(n_crrs * 3);
- vector res_params_dummy(n_crrs * 3);
-
// Prepare metadata as input to kernel
PrepareKernelData(in_params, array_params, in_buff_params, in_buff2_params,
n_crrs);
+#ifdef FPGA_HARDWARE
// warmup run - use this run to warmup accelerator
+ vector res_params_dummy(n_crrs * 3);
CrrSolver(n_crrs, in_buff_params, res_params_dummy, in_buff2_params,
q);
+#endif
+
// Timed run - profile performance
+ vector res_params(n_crrs * 3);
double time = CrrSolver(n_crrs, in_buff_params, res_params,
in_buff2_params, q);
bool pass = true;
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md
index c66454b764..31c532ef45 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/README.md
@@ -43,7 +43,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -154,24 +154,29 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d
3. Compile the design. (The provided targets match the recommended development flow.)
- 1. Compile for emulation (fast compile time, targets emulated FPGA device).
- ```
- make fpga_emu
- ```
- 2. Generate HTML performance report.
- ```
- make report
- ```
- The report resides at `db_report.prj/reports/report.html`.
+ 1. Compile for emulation (fast compile time, targets emulated FPGA device).
+ ```
+ make fpga_emu
+ ```
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
+ ```
+ make report
+ ```
+ The report resides at `db_report.prj/reports/report.html`.
>**Note**: If you are compiling Query 9 (`-DQUERY=9`), expect a long report generation time. You can download pre-generated reports from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz).
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
+
+ ```
+ make fpga
+ ```
+ When building for hardware, the default scale factor is **1**. To use the smaller scale factor of 0.01, add the flag `-DSF_SMALL=1` to the original `cmake` command. For example: `cmake .. -DQUERY=11 -DSF_SMALL=1`. See the [Database files](#database-files) for more information.
- ```
- make fpga
- ```
- When building for hardware, the default scale factor is **1**. To use the smaller scale factor of 0.01, add the flag `-DSF_SMALL=1` to the original `cmake` command. For example: `cmake .. -DQUERY=9 -DSF_SMALL=1`. See the [Database files](#database-files) for more information.
(Optional) The hardware compile may take several hours to complete. You can download a pre-compiled binary (compatible with Linux* Ubuntu* 18.04) for an Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/db.fpga.tar.gz).
@@ -190,23 +195,27 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d
3. Compile the design. (The provided targets match the recommended development flow.)
- 1. Compile for emulation (fast compile time, targets emulated FPGA device).
-
- ```
- nmake fpga_emu
- ```
- 2. Generate HTML performance report.
- ```
- nmake report
- ```
- The report resides at `db_report.prj/reports/report.html` directory.
+ 1. Compile for emulation (fast compile time, targets emulated FPGA device).
+ ```
+ nmake fpga_emu
+ ```
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
+ ```
+ nmake report
+ ```
+ The report resides at `db_report.prj/reports/report.html` directory.
+
+ >**Note**: If you are compiling Query 9 (`-DQUERY=9`), expect a long report generation time.
+
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device):
+ ```
+ nmake fpga
+ ```
- >**Note**: If you are compiling Query 9 (`-DQUERY=9`), expect a long report generation time.
-
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device):
- ```
- nmake fpga
- ```
>**Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example `C:\samples\build`. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
## Run the `DB` Reference Design
@@ -224,26 +233,34 @@ Query 12 showcases the `MergeJoin` database operator. The block diagram of the d
### On Linux
- 1. Run the design on the FPGA emulator (the kernel executes on the CPU).
- ```
- ./db.fpga_emu --dbroot=../data/sf0.01 --test
- ```
- (Optional) Run the design for queries `9`, `11` and `12`.
-
-2. Run the design on an FPGA device.
+1. Run the design on the FPGA emulator (the kernel executes on the CPU).
+ ```
+ ./db.fpga_emu --dbroot=../data/sf0.01 --test
+ ```
+ (Optional) Run the design for queries `9`, `11` and `12`.
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./db.fpga_sim --dbroot=../data/sf0.01 --test
+ ```
+3. Run the design on an FPGA device.
```
./db.fpga --dbroot=../data/sf1 --test
```
### On Windows
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- db.fpga_emu.exe --dbroot=../data/sf0.01 --test
- ```
- (Optional) Run the design for queries `9`, `11` and `12`.
-
-2. Run the sample on an FPGA device.
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
+ ```
+ db.fpga_emu.exe --dbroot=../data/sf0.01 --test
+ ```
+ (Optional) Run the design for queries `9`, `11` and `12`.
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ db.fpga_sim.exe --dbroot=../data/sf0.01 --test
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on an FPGA device.
```
db.fpga.exe --dbroot=../data/sf1 --test
```
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt
index 30849bf784..339f3e0a5d 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(TARGET_NAME db)
set(SOURCE_FILE db.cpp dbdata.cpp)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# which query are we doing?
@@ -132,11 +133,13 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG}")
-set(REPORT_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} -DQUERY=${QUERY} ${SF_SMALL_ARG} ${USER_HARDWARE_FLAGS} ${AC_TYPES_FLAG}")
+set(REPORT_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG} -DFPGA_HARDWARE")
set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xsparallel=2 -Xsseed=2 -Xstarget=${FPGA_DEVICE} -DQUERY=${QUERY} ${SF_SMALL_ARG} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DQUERY=${QUERY} ${SF_SMALL_ARG} ${AC_TYPES_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xsparallel=2 ${SEED} -Xstarget=${FPGA_DEVICE} -DQUERY=${QUERY} ${SF_SMALL_ARG} ${USER_HARDWARE_FLAGS} ${AC_TYPES_FLAG}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -149,6 +152,15 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE} ${DEVICE_SOURCE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp
index fee2020eb8..de81a8905a 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/db.cpp
@@ -124,10 +124,12 @@ int main(int argc, char* argv[]) {
std::string args = "";
unsigned int query = QUERY;
bool test_query = false;
-#ifndef FPGA_EMULATOR
- unsigned int runs = 5;
-#else
+#if defined(FPGA_EMULATOR)
unsigned int runs = 1;
+#elif defined(FPGA_SIMULATOR)
+ unsigned int runs = 1;
+#else
+ unsigned int runs = 5;
#endif
bool print_result = false;
bool need_help = false;
@@ -157,7 +159,8 @@ int main(int argc, char* argv[]) {
// a 'warmup' iteration
runs = std::max(2, atoi(str_after_equals.c_str()) + 1);
#else
- // for emulation, allow a single iteration and don't add a 'warmup' run
+ // for emulation and simulation, allow a single iteration and
+ // don't add a 'warmup' run
runs = std::max(1, atoi(str_after_equals.c_str()));
#endif
} else {
@@ -191,16 +194,23 @@ int main(int argc, char* argv[]) {
// queue properties to enable profiling
auto props = property_list{property::queue::enable_profiling()};
- // the device selector
-#ifdef FPGA_EMULATOR
- ext::intel::fpga_emulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// create the device queue
queue q(selector, fpga_tools::exception_handler, props);
+ device device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// parse the database files located in the 'db_root_dir' directory
bool success = dbinfo.Parse(db_root_dir);
if (!success) {
@@ -259,7 +269,7 @@ int main(int argc, char* argv[]) {
if (success) {
// don't analyze the runtime in emulation
-#ifndef FPGA_EMULATOR
+#if !defined(FPGA_EMULATOR) && !defined(FPGA_SIMULATOR)
// compute the average total latency across all iterations,
// excluding the first 'warmup' iteration
double total_latency_avg =
@@ -292,6 +302,8 @@ int main(int argc, char* argv[]) {
"system has a correctly configured FPGA board.\n";
std::cout << "If you are targeting the FPGA emulator, compile with "
"-DFPGA_EMULATOR.\n";
+ std::cout << "If you are targeting the FPGA simulator, compile with "
+ "-DFPGA_SIMULATOR.\n";
}
std::terminate();
}
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp
index 5e560372ca..437e5801b1 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/dbdata.hpp
@@ -24,7 +24,7 @@ using DBDate = unsigned int;
// The default scale factor for hardware is 1. However,
// the SF_SMALL flag allows the hardware design to be compiled
// with a scale factor of 0.01
-#if defined(FPGA_EMULATOR) || defined(SF_SMALL)
+#if defined(FPGA_EMULATOR) || defined(FPGA_SIMULATOR) || defined(SF_SMALL)
constexpr float kSF = 0.01f;
#else
constexpr float kSF = 1.0f;
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query1/query1_kernel.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query1/query1_kernel.cpp
index 85ad0bc090..09e6e083a1 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query1/query1_kernel.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/db/src/query1/query1_kernel.cpp
@@ -9,7 +9,11 @@
using namespace std::chrono;
// how many elements to compute per cycle
+#if defined(FPGA_SIMULATOR)
+constexpr int kElementsPerCycle = 2;
+#else
constexpr int kElementsPerCycle = 12;
+#endif
// the kernel name
class Query1;
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/README.md
index 278987cd6a..fcb5b7b8d9 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/README.md
@@ -41,7 +41,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -320,20 +320,24 @@ For `constexpr_math.hpp`, `memory_utils.hpp`, `metaprogramming_utils.hpp`, `tupl
3. Compile the design. (The provided targets match the recommended development flow.)
- 1. Compile for emulation (fast compile time, targets emulated FPGA device).
- ```
- make fpga_emu
- ```
- 2. Generate the HTML performance report.
- ```
- make report
- ```
- The report resides at `decompression type>_report.prj/reports/report/report.html`.
-
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
- ```
- make fpga
- ```
+ 1. Compile for emulation (fast compile time, targets emulated FPGA device).
+ ```
+ make fpga_emu
+ ```
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate the HTML performance report.
+ ```
+ make report
+ ```
+ The report resides at `decompression type>_report.prj/reports/report/report.html`.
+
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ ```
+ make fpga
+ ```
(Optional) The hardware compiles listed above can take several hours to complete; alternatively, you can download FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/decompress.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/decompress.fpga.tar.gz).
@@ -359,20 +363,24 @@ For `constexpr_math.hpp`, `memory_utils.hpp`, `metaprogramming_utils.hpp`, `tupl
```
3. Compile the design. (The provided targets match the recommended development flow.)
- 1. Compile for emulation (fast compile time, targets emulated FPGA device).
- ```
- nmake fpga_emu
- ```
- 2. Generate the HTML performance report.
- ```
- nmake report
- ```
- The report resides at `_report.a.prj/reports/report/report.html`.
-
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
- ```
- nmake fpga
- ```
+ 1. Compile for emulation (fast compile time, targets emulated FPGA device).
+ ```
+ nmake fpga_emu
+ ```
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate the HTML performance report.
+ ```
+ nmake report
+ ```
+ The report resides at `_report.a.prj/reports/report/report.html`.
+
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ ```
+ nmake fpga
+ ```
> **Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example `c:\samples\build`. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
## Run the `Decompression` Program
@@ -380,24 +388,34 @@ For `constexpr_math.hpp`, `memory_utils.hpp`, `metaprogramming_utils.hpp`, `tupl
### On Linux
1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- ./decompress.fpga_emu
- ```
-2. Run the sample on the FPGA device.
- ```
- ./decompress.fpga
- ```
+ ```
+ ./decompress.fpga_emu
+ ```
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./decompress.fpga_sim
+ ```
+3. Run the sample on the FPGA device.
+ ```
+ ./decompress.fpga
+ ```
### On Windows
1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- decompress.fpga_emu.exe
- ```
-2. Run the sample on the FPGA device.
- ```
- decompress.fpga.exe
- ```
+ ```
+ decompress.fpga_emu.exe
+ ```
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ decompress.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device.
+ ```
+ decompress.fpga.exe
+ ```
## Example Output
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/CMakeLists.txt
index b1d6850ff7..d01e36b8cd 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/CMakeLists.txt
@@ -94,11 +94,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} -fsycl -fintelfpga ${AC_TYPES_FLAG} ${LITERALS_PER_CYCLE_FLAG} ${DECOMPRESS_FORMAT_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} ${AC_TYPES_FLAG} ${LITERALS_PER_CYCLE_FLAG} ${DECOMPRESS_FORMAT_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} -fsycl -fintelfpga ${AC_TYPES_FLAG} ${LITERALS_PER_CYCLE_FLAG} ${DECOMPRESS_FORMAT_FLAG} -DFPGA_SIMULATOR")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} ${AC_TYPES_FLAG} ${LITERALS_PER_CYCLE_FLAG} ${DECOMPRESS_FORMAT_FLAG} -DFPGA_SIMULATOR")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} -fsycl -fintelfpga ${AC_TYPES_FLAG} ${LITERALS_PER_CYCLE_FLAG} ${DECOMPRESS_FORMAT_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${CONSTEXPR_STEPS} ${WIN_FLAG} ${AC_TYPES_FLAG} ${LITERALS_PER_CYCLE_FLAG} ${DECOMPRESS_FORMAT_FLAG} -DFPGA_HARDWARE")
set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware ${PROFILE_FLAG} ${FLAT_COMPILE_FLAG} -Xsparallel=2 ${SEED_FLAG} -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
set(HARDWARE_LINK_FLAGS "${REPORT_LINK_FLAGS} ${AC_TYPES_FLAG}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/main.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/main.cpp
index a15df060fa..f4dca99219 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/main.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/decompress/src/main.cpp
@@ -124,18 +124,23 @@ int main(int argc, char* argv[]) {
std::cout << "Using " << decompressor_name << " decompression\n";
std::cout << std::endl;
- // the device selector
-#if defined(FPGA_EMULATOR)
- sycl::ext::intel::fpga_emulator_selector selector;
-#elif defined(FPGA_SIMULATOR)
- sycl::ext::intel::fpga_simulator_selector selector;
-#else
- sycl::ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// create the device queue
queue q(selector, fpga_tools::exception_handler);
+ device device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// create the decompressor based on which decompression version we are using
#if defined(GZIP)
GzipDecompressorT decompressor;
@@ -185,6 +190,7 @@ bool RunGzipTest(sycl::queue& q, GzipDecompressorT decompressor,
std::string dynamic_compress_filename = test_dir + "/dynamic_compressed.gz";
std::string tp_test_filename = test_dir + "/tp_test.gz";
+#ifndef FPGA_SIMULATOR
std::cout << ">>>>> Uncompressed File Test <<<<<" << std::endl;
bool uncompressed_test_pass = decompressor.DecompressFile(
q, uncompressed_filename, "", 1, false, false);
@@ -196,6 +202,12 @@ bool RunGzipTest(sycl::queue& q, GzipDecompressorT decompressor,
q, static_compress_filename, "", 1, false, false);
PrintTestResults("Statically Compressed File Test", static_test_pass);
std::cout << std::endl;
+#else
+ std::cout << "Only running the Dynamically Compressed File Test when using "
+ "the simulator flow to reduce execution time." << std::endl;
+ bool uncompressed_test_pass = true;
+ bool static_test_pass = true;
+#endif
std::cout << ">>>>> Dynamically Compressed File Test <<<<<" << std::endl;
bool dynamic_test_pass = decompressor.DecompressFile(
@@ -203,12 +215,17 @@ bool RunGzipTest(sycl::queue& q, GzipDecompressorT decompressor,
PrintTestResults("Dynamically Compressed File Test", dynamic_test_pass);
std::cout << std::endl;
+
+#ifndef FPGA_SIMULATOR
std::cout << ">>>>> Throughput Test <<<<<" << std::endl;
constexpr int kTPTestRuns = 5;
bool tp_test_pass = decompressor.DecompressFile(q, tp_test_filename, "",
kTPTestRuns, true, false);
PrintTestResults("Throughput Test", tp_test_pass);
std::cout << std::endl;
+#else
+ bool tp_test_pass = true;
+#endif
return uncompressed_test_pass && static_test_pass && dynamic_test_pass &&
tp_test_pass;
@@ -231,6 +248,7 @@ bool RunSnappyTest(sycl::queue& q, SnappyDecompressorT decompressor,
PrintTestResults("Alice In Wonderland Test", alice_test_pass);
std::cout << std::endl;
+#ifndef FPGA_SIMULATOR
std::cout << ">>>>> Only Literal Strings Test <<<<<" << std::endl;
auto test1_bytes = GenerateSnappyCompressedData(333, 3, 0, 0, 3);
auto test1_ret = decompressor.DecompressBytes(q, test1_bytes, 1, false);
@@ -265,6 +283,11 @@ bool RunSnappyTest(sycl::queue& q, SnappyDecompressorT decompressor,
PrintTestResults("Throughput Test", test_tp_pass);
std::cout << std::endl;
- return test1_pass && test2_pass && test3_pass && test_tp_pass;
+ return alice_test_pass && test1_pass && test2_pass && test3_pass &&
+ test_tp_pass;
+#else
+ return alice_test_pass;
+#endif
+
}
#endif
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/README.md
index fe91d4938c..ae930552aa 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/README.md
@@ -44,7 +44,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -238,7 +238,7 @@ Performance results are based on testing as of October 27, 2020.
2. Run the sample on the FPGA simulator.
```
- ./gzip.fpga_sim -o=
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./gzip.fpga_sim -o=
```
3. Run the sample on the FPGA device.
@@ -254,7 +254,9 @@ Performance results are based on testing as of October 27, 2020.
```
2. Run the sample on the FPGA simulator.
```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
gzip.fpga_sim.exe -o=
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
```
3. Run the sample on the FPGA device.
```
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt
index 9b07df652a..56b9aabe00 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/CMakeLists.txt
@@ -100,11 +100,11 @@ message(STATUS "NUM_REORDER=${NUM_REORDER}")
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DNUM_ENGINES=${NUM_ENGINES} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DNUM_ENGINES=${NUM_ENGINES} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga -DNUM_ENGINES=${NUM_ENGINES}")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Xssimulation -DNUM_ENGINES=${NUM_ENGINES} -DFPGA_SIMULATOR")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DNUM_ENGINES=${NUM_ENGINES} -DFPGA_SIMULATOR")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} -DNUM_ENGINES=${NUM_ENGINES} ${USER_SIMULATOR_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DNUM_ENGINES=${NUM_ENGINES}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DNUM_ENGINES=${NUM_ENGINES} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xsparallel=2 -Xsopt-arg=\"-nocaching\" -Xstarget=${FPGA_DEVICE} -DNUM_ENGINES=${NUM_ENGINES} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip.cpp
index 183e6732e1..fe4e825334 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip.cpp
@@ -120,18 +120,23 @@ int main(int argc, char *argv[]) {
}
try {
-#ifdef FPGA_EMULATOR
- ext::intel::fpga_emulator_selector device_selector;
-#elif FPGA_SIMULATOR
- ext::intel::fpga_simulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
+
auto prop_list = property_list{property::queue::enable_profiling()};
- queue q(device_selector, fpga_tools::exception_handler, prop_list);
+ queue q(selector, fpga_tools::exception_handler, prop_list);
+
+ auto device = q.get_device();
- std::cout << "Running on device: "
- << q.get_device().get_info().c_str() << "\n";
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
if (infilename == "") {
std::cout << "Must specify a filename to compress\n\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip_ll.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip_ll.cpp
index fac52dcd3d..b73a13d2e7 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip_ll.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/gzip/src/gzip_ll.cpp
@@ -125,18 +125,23 @@ int main(int argc, char *argv[]) {
}
try {
-#ifdef FPGA_EMULATOR
- ext::intel::fpga_emulator_selector device_selector;
-#elif FPGA_SIMULATOR
- ext::intel::fpga_simulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
+
auto prop_list = property_list{property::queue::enable_profiling()};
- queue q(device_selector, fpga_tools::exception_handler, prop_list);
+ queue q(selector, fpga_tools::exception_handler, prop_list);
+
+ auto device = q.get_device();
- std::cout << "Running on device: "
- << q.get_device().get_info().c_str() << "\n";
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
if (infilename == "") {
std::cout << "Must specify a filename to compress\n\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/README.md
index ff6a3b9beb..952891b52e 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/README.md
@@ -46,7 +46,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -133,19 +133,23 @@ For `constexpr_math.hpp`, `pipe_utils.hpp`, and `unrolled_loop.hpp` see the READ
3. Compile the design. (The provided targets match the recommended development flow.)
1. Compile for emulation (fast compile time, targets emulated FPGA device).
- ```
- make fpga_emu
- ```
- 2. Generate the HTML performance report.
- ```
- make report
- ```
+ ```
+ make fpga_emu
+ ```
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate the HTML performance report.
+ ```
+ make report
+ ```
The report resides at `merge_sort_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
- ```
- make fpga
- ```
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ ```
+ make fpga
+ ```
(Optional) The hardware compiles listed above can take several hours to complete; alternatively, you can download FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/merge_sort.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/merge_sort.fpga.tar.gz).
@@ -170,13 +174,17 @@ For `constexpr_math.hpp`, `pipe_utils.hpp`, and `unrolled_loop.hpp` see the READ
```
nmake fpga_emu
```
- 2. Generate the HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate the HTML performance report.
```
nmake report
```
The report resides at `merge_sort_report.a.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
@@ -190,7 +198,11 @@ For `constexpr_math.hpp`, `pipe_utils.hpp`, and `unrolled_loop.hpp` see the READ
```
./merge_sort.fpga_emu
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./merge_sort.fpga_sim
+ ```
+3. Run the sample on the FPGA device.
```
./merge_sort.fpga
```
@@ -200,7 +212,13 @@ For `constexpr_math.hpp`, `pipe_utils.hpp`, and `unrolled_loop.hpp` see the READ
```
merge_sort.fpga_emu.exe
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ merge_sort.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device.
```
merge_sort.fpga.exe
```
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/CMakeLists.txt
index 319e342ba9..917d1e16c9 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(TARGET_NAME merge_sort)
set(SOURCE_FILE main.cpp)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# FPGA board selection
@@ -65,9 +66,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG}")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware ${PROFILE_FLAG} -Xsparallel=2 ${SEED_FLAG} -Xstarget=${FPGA_DEVICE} ${ENABLE_USM} ${MERGE_UNITS_FLAG} ${SORT_WIDTH_FLAG} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -80,6 +83,15 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/main.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/main.cpp
index 2e0ee0198b..43e4bdc62e 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/main.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/merge_sort/src/main.cpp
@@ -34,8 +34,12 @@ constexpr bool kUseUSMHostAllocation = false;
// This can be set by defining the preprocessor macro 'MERGE_UNITS'
// otherwise the default value below is used.
#ifndef MERGE_UNITS
+#if defined(FPGA_SIMULATOR)
+#define MERGE_UNITS 2
+#else
#define MERGE_UNITS 8
#endif
+#endif
constexpr size_t kMergeUnits = MERGE_UNITS;
static_assert(kMergeUnits > 0);
static_assert(fpga_tools::IsPow2(kMergeUnits));
@@ -73,9 +77,12 @@ int main(int argc, char *argv[]) {
// reading and validating the command line arguments
// defaults
bool passed = true;
-#ifdef FPGA_EMULATOR
+#if defined(FPGA_EMULATOR)
IndexT count = 128;
int runs = 2;
+#elif defined(FPGA_SIMULATOR)
+ IndexT count = 16;
+ int runs = 2;
#else
IndexT count = 1 << 24;
int runs = 17;
@@ -118,31 +125,37 @@ int main(int argc, char *argv[]) {
/////////////////////////////////////////////////////////////
// the device selector
-#ifdef FPGA_EMULATOR
- ext::intel::fpga_emulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// create the device queue
queue q(selector, fpga_tools::exception_handler);
// make sure the device supports USM device allocations
- auto d = q.get_device();
- if (!q.get_device().has(aspect::usm_device_allocations)) {
+ auto device = q.get_device();
+ if (!device.has(aspect::usm_device_allocations)) {
std::cerr << "ERROR: The selected device does not support USM device"
<< " allocations\n";
std::terminate();
}
// make sure the device support USM host allocations if we chose to use them
- if (!q.get_device().has(aspect::usm_host_allocations) &&
+ if (!device.has(aspect::usm_host_allocations) &&
kUseUSMHostAllocation) {
std::cerr << "ERROR: The selected device does not support USM host"
<< " allocations\n";
std::terminate();
}
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// the input, output, and reference data
std::vector in_vec(count), out_vec(count), ref(count);
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/README.md
index ed8aeaac9b..fabb9cf5c3 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/README.md
@@ -54,7 +54,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -132,19 +132,23 @@ The `DataProducer` kernel replaces the input IO pipe in the first image. The spl
3. Compile the design. (The provided targets match the recommended development flow.)
1. Compile for emulation (fast compile time, targets emulated FPGA device).
- ```
- make fpga_emu
- ```
- 2. Generate the HTML performance report.
- ```
- make report
- ```
+ ```
+ make fpga_emu
+ ```
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate the HTML performance report.
+ ```
+ make report
+ ```
The report resides at `mvdr_beamforming_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
- ```
- make fpga
- ```
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ ```
+ make fpga
+ ```
(Optional) The hardware compiles listed above can take several hours to complete; alternatively, you can download FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/mvdr_beamforming.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/mvdr_beamforming.fpga.tar.gz).
@@ -169,13 +173,17 @@ The `DataProducer` kernel replaces the input IO pipe in the first image. The spl
```
nmake fpga_emu
```
- 2. Generate the HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate the HTML performance report.
```
nmake report
```
The report resides at `mvdr_beamforming_report.a.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
@@ -196,11 +204,15 @@ The general syntax for running the program is shown below and the table describe
| 2 | The output directory (default=`.`)
### On Linux
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
- ```
- ./mvdr_beamforming.fpga_emu 1024 ../data .
- ```
-2. Run the sample on the FPGA device.
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU).
+ ```
+ ./mvdr_beamforming.fpga_emu 1024 ../data .
+ ```
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./mvdr_beamforming.fpga_sim 1024 ../data .
+ ```
+3. Run the sample on the FPGA device.
```
./mvdr_beamforming.fpga 1024 ../data .
```
@@ -211,7 +223,13 @@ The general syntax for running the program is shown below and the table describe
```
mvdr_beamforming.fpga_emu.exe 1024 ../data .
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ mvdr_beamforming.fpga_sim.exe ../data .
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device.
```
mvdr_beamforming.fpga.exe 1024 ../data .
```
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/CMakeLists.txt
index cafff0556c..514fd4e447 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/CMakeLists.txt
@@ -86,12 +86,12 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -fbracket-depth=512 ${AC_TYPES_FLAG} ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -fbracket-depth=512 ${AC_TYPES_FLAG} ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} ${ENABLE_USM}")
-set(SIMULATOR_COMPILE_FLAGS "${WIN_FLAG} -Wall -fsycl -fintelfpga -fbracket-depth=512 ${AC_TYPES_FLAG} ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} -DFPGA_SIMULATOR")
-set(HARDWARE_COMPILE_FLAGS "${WIN_FLAG} -fbracket-depth=512 -fsycl -fintelfpga ${AC_TYPES_FLAG} ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${REAL_IO_PIPES_FLAG} ${STREAMING_PIPE_WIDTH_FLAG}")
-set(REPORT_LINK_FLAGS "-Wall -fsycl -fintelfpga -Xshardware -fbracket-depth=512 ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${REAL_IO_PIPES_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} ${PROFILE_FLAG} -Xsparallel=2 -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} ${UDP_LINK_FLAGS}")
-set(SIMULATOR_LINK_FLAGS "${REPORT_LINK_FLAGS} ${AC_TYPES_FLAG} -Xssimulation -Xsghdl")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -fbracket-depth=512 ${AC_TYPES_FLAG} ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Wall -fbracket-depth=512 ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${REAL_IO_PIPES_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} ${UDP_LINK_FLAGS} ${AC_TYPES_FLAG} -Xssimulation -Xsghdl")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${WIN_FLAG} -fbracket-depth=512 ${AC_TYPES_FLAG} ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${REAL_IO_PIPES_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} -FPGA_HARDWARE")
+set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Wall -Xshardware -fbracket-depth=512 ${ENABLE_USM} ${SENSOR_SIZE_FLAG} ${NUM_SENSORS_FLAG} ${QRD_MIN_ITERATIONS_FLAG} ${REAL_IO_PIPES_FLAG} ${STREAMING_PIPE_WIDTH_FLAG} ${PROFILE_FLAG} -Xsparallel=2 -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} ${UDP_LINK_FLAGS}")
set(HARDWARE_LINK_FLAGS "${REPORT_LINK_FLAGS} ${AC_TYPES_FLAG}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/mvdr_beamforming.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/mvdr_beamforming.cpp
index 8f1e500a44..3f892c624a 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/mvdr_beamforming.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/mvdr_beamforming/src/mvdr_beamforming.cpp
@@ -154,7 +154,11 @@ void PrintUsage();
// the main function
int main(int argc, char *argv[]) {
UDPArgs udp_args;
+#if defined(FPGA_SIMULATOR)
+ int num_matrix_copies = 2;
+#else
int num_matrix_copies = 1024;
+#endif
std::string in_dir = "../data";
std::string out_dir = ".";
@@ -220,10 +224,12 @@ int main(int argc, char *argv[]) {
try {
// device selector
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// create the device queue
@@ -233,6 +239,12 @@ int main(int argc, char *argv[]) {
queue q(selector, fpga_tools::exception_handler);
#endif
+ device device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// initialize the producers and consumers
#if not defined(REAL_IO_PIPES)
DataProducer::Init(q, kInputDataSize * num_matrix_copies);
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/README.md
index 629edd7063..28b11d95b4 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/README.md
@@ -49,7 +49,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -152,13 +152,17 @@ Additionaly, the cmake build system can be configured using the following parame
```
make fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
make report
```
The report resides at `qrd_report/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
@@ -187,13 +191,17 @@ Additionaly, the cmake build system can be configured using the following parame
```
nmake fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
nmake report
```
The report resides at `qrd_report.a.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
@@ -222,6 +230,14 @@ You can perform the QR decomposition of the set of matrices repeatedly. This ste
export CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE=32MB
./qrd.fpga_emu
```
+
+#### Run on FPGA Simulator
+
+1. Run the sample on the FPGA simulator.
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./qrd.fpga_sim
+ ```
+
#### Run on FPGA
1. Run the sample on the FPGA device.
@@ -239,6 +255,16 @@ You can perform the QR decomposition of the set of matrices repeatedly. This ste
set CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE=32MB
qrd.fpga_emu.exe
```
+
+#### Run on FPGA Simulator
+
+1. Run the sample on the FPGA simulator.
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ qrd.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+
#### Run on FPGA
1. Run the sample on the FPGA device.
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt
index 27104ca128..b909ab5663 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/CMakeLists.txt
@@ -93,11 +93,11 @@ message(STATUS "SEED=${SEED}")
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} ${AC_TYPES_COMPILE_FLAG} -Wformat-security -Werror=format-security -fbracket-depth=512 -fsycl -fintelfpga -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} ${AC_TYPES_COMPILE_FLAG} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} ${STACK_FLAG} ${AC_TYPES_LINK_FLAG}")
set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} ${AC_TYPES_COMPILE_FLAG} -DFPGA_SIMULATOR -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} ${USER_HARDWARE_FLAGS}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} ${STACK_FLAG} -Xssimulation -Xsghdl -Xsclock=${CLOCK_TARGET} -Xstarget=${FPGA_DEVICE} ${USER_SIMULATOR_FLAGS} ${AC_TYPES_LINK_FLAG}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} ${AC_TYPES_COMPILE_FLAG} -Wformat-security -Werror=format-security -fsycl -fintelfpga -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} ${AC_TYPES_COMPILE_FLAG} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS=${FIXED_ITERATIONS} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -DFPGA_HARDWARE")
set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware ${PLATFORM_SPECIFIC_LINK_FLAGS} -Xsclock=${CLOCK_TARGET} -Xsparallel=2 ${SEED} -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} ${AC_TYPES_LINK_FLAG}")
set(HARDWARE_LINK_FLAGS "${REPORT_LINK_FLAGS} ${STACK_FLAG}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp
index 57487846a4..e34447d1b1 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qrd/src/qrd_demo.cpp
@@ -112,24 +112,25 @@ int main(int argc, char *argv[]) {
#endif
try {
- // SYCL boilerplate
-#if defined(FPGA_EMULATOR)
- sycl::ext::intel::fpga_emulator_selector device_selector;
-#elif defined(FPGA_SIMULATOR)
- sycl::ext::intel::fpga_simulator_selector device_selector;
-#else
- sycl::ext::intel::fpga_selector device_selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// Enable the queue profiling to time the execution
sycl::property_list
queue_properties{sycl::property::queue::enable_profiling()};
- sycl::queue q = sycl::queue(device_selector,
+ sycl::queue q = sycl::queue(selector,
fpga_tools::exception_handler,
queue_properties);
sycl::device device = q.get_device();
- std::cout << "Device name: "
+
+ std::cout << "Running on device: "
<< device.get_info().c_str()
<< std::endl;
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/README.md
index ec52b2b236..9b6576f876 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/README.md
@@ -49,7 +49,7 @@ You can also find more information about [troubleshooting build errors](/DirectP
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -144,13 +144,17 @@ Additionaly, the cmake build system can be configured using the following parame
```
make fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
make report
```
The report resides at `qri_report/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
@@ -179,13 +183,17 @@ Additionaly, the cmake build system can be configured using the following parame
```
nmake fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
nmake report
```
The report resides at `qri_report.a.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
@@ -215,6 +223,14 @@ You can perform the QR-based inversion of the set of matrices repeatedly, as sho
export CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE=32MB
./qri.fpga_emu
```
+
+#### Run on FPGA Simulator
+
+1. Run the sample on the FPGA simulator.
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./qri.fpga_sim
+ ```
+
#### Run on FPGA
1. Run the sample on the FPGA device.
@@ -232,6 +248,16 @@ You can perform the QR-based inversion of the set of matrices repeatedly, as sho
set CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE=32MB
qri.fpga_emu.exe
```
+
+#### Run on FPGA Simulator
+
+1. Run the sample on the FPGA simulator.
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ qri.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+
#### Run on FPGA
1. Run the sample on the FPGA device.
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/CMakeLists.txt
index 7e05fb11f3..0e508ebf5c 100755
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/CMakeLists.txt
@@ -94,11 +94,11 @@ message(STATUS "SEED=${SEED}")
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -fsycl -fintelfpga -DFIXED_ITERATIONS_QRD=${FIXED_ITERATIONS_QRD} -DFIXED_ITERATIONS_QRI=${FIXED_ITERATIONS_QRI} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS_QRD=${FIXED_ITERATIONS_QRD} -DFIXED_ITERATIONS_QRI=${FIXED_ITERATIONS_QRI} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS}")
set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -DFPGA_SIMULATOR -fbracket-depth=512 -DFIXED_ITERATIONS_QRD=${FIXED_ITERATIONS_QRD} -DFIXED_ITERATIONS_QRI=${FIXED_ITERATIONS_QRI} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -Xsfp-relaxed ${USER_HARDWARE_FLAGS}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} -Xssimulation -Xsghdl -Xsclock=${CLOCK_TARGET} -Xstarget=${FPGA_DEVICE} ${USER_SIMULATOR_FLAGS} -Xsfp-relaxed")
-set(HARDWARE_COMPILE_FLAGS "${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fsycl -fintelfpga -fbracket-depth=512 -DFIXED_ITERATIONS_QRD=${FIXED_ITERATIONS_QRD} -DFIXED_ITERATIONS_QRI=${FIXED_ITERATIONS_QRI} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -Xsfp-relaxed")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_COMPILE_FLAGS} -Wformat-security -Werror=format-security -fbracket-depth=512 -DFIXED_ITERATIONS_QRD=${FIXED_ITERATIONS_QRD} -DFIXED_ITERATIONS_QRI=${FIXED_ITERATIONS_QRI} -DCOMPLEX=${COMPLEX} -DROWS_COMPONENT=${ROWS_COMPONENT} -DCOLS_COMPONENT=${COLS_COMPONENT} -Xsfp-relaxed -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${PLATFORM_SPECIFIC_LINK_FLAGS} -Xshardware -Xsclock=${CLOCK_TARGET} -Xsparallel=2 ${SEED} -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} -Xsfp-relaxed")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/qri_demo.cpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/qri_demo.cpp
index 5b1e25b979..bea198e997 100644
--- a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/qri_demo.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/qri/src/qri_demo.cpp
@@ -212,24 +212,25 @@ int main(int argc, char *argv[]) {
}
try {
- // SYCL boilerplate
-#if defined(FPGA_EMULATOR)
- sycl::ext::intel::fpga_emulator_selector device_selector;
-#elif defined(FPGA_SIMULATOR)
- sycl::ext::intel::fpga_simulator_selector device_selector;
-#else
- sycl::ext::intel::fpga_selector device_selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// Enable the queue profiling to time the execution
sycl::property_list
queue_properties{sycl::property::queue::enable_profiling()};
- sycl::queue q = sycl::queue(device_selector,
+ sycl::queue q = sycl::queue(selector,
fpga_tools::exception_handler,
queue_properties);
sycl::device device = q.get_device();
- std::cout << "Device name: "
+
+ std::cout << "Running on device: "
<< device.get_info().c_str()
<< std::endl;
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/README.md
index ef5f279345..10848c6f24 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/README.md
@@ -14,6 +14,27 @@ The purpose of this tutorial is to demonstrate how to create autorun kernels in
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
| Optimized for | Description
|:--- |:---
| OS | Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
@@ -22,7 +43,7 @@ The purpose of this tutorial is to demonstrate how to create autorun kernels in
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -31,12 +52,6 @@ The purpose of this tutorial is to demonstrate how to create autorun kernels in
>**Note**: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04*.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Implementation Details
This sample demonstrates the following concepts:
@@ -50,15 +65,11 @@ Typically, these kernels are meant to run forever, and data is streamed to and f

-
-## Set Environment Variables
-
-When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development.
-
## Build the `Autorun Kernels` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -66,24 +77,10 @@ When working with the command-line interface (CLI), you should configure the one
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
-
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On Linux*
@@ -126,7 +123,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
make fpga
```
-
### On Windows*
>**Note**: The Intel® PAC with Intel Arria® 10 GX FPGA and Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) do not yet support Windows*. Compiling to FPGA hardware on Windows* requires a third-party or custom Board Support Package (BSP) with Windows* support.
@@ -179,7 +175,7 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
2. Run on the FPGA simulator.
```
- ./autorun.fpga_sim
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./autorun.fpga_sim
```
3. Run on an FPGA device.
```
@@ -192,52 +188,21 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
set SYCL_ENABLE_DEFAULT_CONTEXTS=1
autorun.fpga_emu.exe
+ set SYCL_ENABLE_DEFAULT_CONTEXTS=
```
>**Note**: You must set the `SYCL_ENABLE_DEFAULT_CONTEXTS=1` environment variable or the program will hang.
2. Run on the FPGA simulator.
```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
autorun.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
```
3. Run on an FPGA device.
```
autorun.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Arria 10) |`qsub -l nodes=1:fpga_runtime:arria10:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, *[Launch and manage jobs](https://devcloud.intel.com/oneapi/documentation/job-submission/)* in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:arria10` or `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI *[Intel® oneAPI Base Toolkit Get Started](https://devcloud.intel.com/oneapi/get_started/)* page.
-
## Example Output
```
@@ -248,7 +213,6 @@ PASSED
## License
-Code samples are licensed under the MIT license. See
-[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
+Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
-Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
+Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/CMakeLists.txt
index aff8c213d7..dbfb02daef 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/CMakeLists.txt
@@ -27,7 +27,7 @@ set(EMULATOR_COMPILE_FLAGS "-fsycl -Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
set(SIMULATOR_COMPILE_FLAGS "-fsycl -Wall ${WIN_FLAG} -fintelfpga -DFPGA_SIMULATOR")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-fsycl -Wall ${WIN_FLAG} -fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -Wall ${WIN_FLAG} -fintelfpga -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/autorun.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/autorun.cpp
index 324d67a2e7..82843deb02 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/autorun.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/autorun/src/autorun.cpp
@@ -13,12 +13,12 @@ using namespace sycl;
// choose the device selector based on emulation or actual hardware
// we make this a global variable so it can be used by the autorun kernels
-#if defined(FPGA_EMULATOR)
-ext::intel::fpga_emulator_selector ds;
-#elif defined(FPGA_SIMULATOR)
-ext::intel::fpga_simulator_selector ds;
-#else
-ext::intel::fpga_selector ds;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// declare the kernel names globally to reduce name mangling
@@ -55,7 +55,7 @@ struct MyAutorun {
// declaring a global instance of this class causes the constructor to be called
// before main() starts, and the constructor launches the kernel.
-fpga_tools::Autorun ar_kernel{ds, MyAutorun{}};
+fpga_tools::Autorun ar_kernel{selector, MyAutorun{}};
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
@@ -73,7 +73,7 @@ struct MyAutorunForever {
// declaring a global instance of this class causes the constructor to be called
// before main() starts, and the constructor launches the kernel.
fpga_tools::AutorunForever ar_forever_kernel{
- ds, MyAutorunForever{}};
+ selector, MyAutorunForever{}};
////////////////////////////////////////////////////////////////////////////////
//
@@ -120,7 +120,13 @@ int main() {
try {
// create the queue
- queue q(ds, fpga_tools::exception_handler);
+ queue q(selector, fpga_tools::exception_handler);
+
+ sycl::device device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
// stream data through the Autorun kernel
std::cout << "Running the Autorun kernel test\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/README.md
index 9de3f715c9..3b3d7277fa 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/README.md
@@ -21,14 +21,28 @@ Before starting this tutorial, we recommend first reviewing the following FPGA s
The concepts explained in these tutorials will be used in this tutorial to create a highly optimized heterogeneous design. This tutorial also assumes that the reader has a basic understanding of multi-threaded C++ programming. More information on C++ multi-threading programming can be found in the *[Multi-threading](http://www.cplusplus.com/reference/multithreading/)* section of the [cplusplus.com](https://cplusplus.com/) site.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
| Optimized for | Description
|:--- |:---
@@ -38,7 +52,7 @@ The concepts explained in these tutorials will be used in this tutorial to creat
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -49,7 +63,6 @@ The concepts explained in these tutorials will be used in this tutorial to creat
>**Note**: SYCL* USM host allocations (and the code in this sample) are only supported for the **FPGA Programmable Acceleration Card (PAC) D5005 (with Intel Stratix® 10 SX)** with USM support (for example, intel_s10sx_pac:pac_s10_usm).
-
## Key Implementation Details
This sample demonstrates the following concepts:
@@ -59,14 +72,11 @@ This sample demonstrates the following concepts:
- Runtime SYCL kernel management
- C++17 Multi-threaded programming
-## Set Environment Variables
-
-When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development.
-
## Build the `Buffered Host-Device Streaming` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -74,23 +84,10 @@ When working with the command-line interface (CLI), you should configure the one
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On Linux*
@@ -112,20 +109,23 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
make fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
make report
```
The report resides at `buffered_host_streaming_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
(Optional) The hardware compiles listed above can take several hours to complete; alternatively, you can download FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) from [https://iotdk.intel.com/fpga-precompiled-binaries/latest/buffered_host_streaming.fpga.tar.gz](https://iotdk.intel.com/fpga-precompiled-binaries/latest/buffered_host_streaming.fpga.tar.gz).
-
### On Windows*
>**Note**: The Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) does not yet support Windows*. Compiling to FPGA hardware on Windows* requires a third-party or custom Board Support Package (BSP) with Windows* support.
@@ -148,7 +148,11 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
nmake fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
nmake report
```
@@ -161,16 +165,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
>**Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your `build` directory in a shorter path, for example `C:\samples\build`. You can then build the sample in the new location, but you must specify the full path to the build files.
-#### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-```
-make VERBOSE=1
-```
-If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the *[Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html)* for more information on using the utility.
-
-
## Run the `Buffered Host-Device Streaming` Sample
### On Linux
@@ -179,7 +173,11 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
./buffered_host_streaming.fpga_emu
```
-2. Run the sample on the FPGA device:
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./buffered_host_streaming.fpga_sim
+ ```
+3. Run the sample on the FPGA device:
```
./buffered_host_streaming.fpga
```
@@ -190,44 +188,17 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
buffered_host_streaming.fpga_emu.exe
```
-2. Run the sample on the FPGA device:
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ buffered_host_streaming.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device:
```
buffered_host_streaming.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, *[Launch and manage jobs](https://devcloud.intel.com/oneapi/documentation/job-submission/)* in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI *[Intel® oneAPI Base Toolkit Get Started](https://devcloud.intel.com/oneapi/get_started/)* page.
-
## Example Output
The following results were obtained on a system with the following specification.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/CMakeLists.txt
index 91871286b6..b6c8d1ac41 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(SOURCE_FILE buffered_host_streaming.cpp)
set(TARGET_NAME buffered_host_streaming)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
set(REPORTS_TARGET ${TARGET_NAME}_report)
@@ -35,9 +36,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${THREAD_FLAG}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS} ${THREAD_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${THREAD_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -55,6 +58,20 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+# To compile in a single command:
+# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim
+# CMake executes:
+# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp
+# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/buffered_host_streaming.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/buffered_host_streaming.cpp
index 510806efaf..14601fb769 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/buffered_host_streaming.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/buffered_host_streaming/src/buffered_host_streaming.cpp
@@ -36,6 +36,10 @@ int main(int argc, char* argv[]) {
size_t reps = 20;
size_t buffer_count = 1 << 12; // 4096
size_t iterations = 2;
+#elif defined(FPGA_SIMULATOR)
+ size_t reps = 2;
+ size_t buffer_count = 1 << 8; // 256
+ size_t iterations = 2;
#else
size_t reps = 200;
size_t buffer_count = 1 << 19; // 524388
@@ -123,11 +127,12 @@ int main(int argc, char* argv[]) {
bool passed = true;
try {
- // device selector
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// queue properties to enable profiling
@@ -137,13 +142,17 @@ int main(int argc, char* argv[]) {
queue q(selector, fpga_tools::exception_handler, prop_list);
// make sure the device supports USM host allocations
- device d = q.get_device();
- if (!d.get_info()) {
+ auto device = q.get_device();
+ if (!device.get_info()) {
std::cerr << "ERROR: The selected device does not support USM host"
<< " allocations\n";
std::terminate();
}
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
///////////////////////////////////////////////////////////////////////////
// find the bandwidth of each processing component in our design
std::cout << "Running the roofline analysis\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/README.md
index 2bf83cdd88..1eca0d48ca 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/README.md
@@ -17,6 +17,27 @@ This tutorial provides a header file that defines an abstraction for making mult
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
| Optimized for | Description
|:--- |:---
| OS | Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
@@ -25,7 +46,7 @@ This tutorial provides a header file that defines an abstraction for making mult
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -34,12 +55,6 @@ This tutorial provides a header file that defines an abstraction for making mult
>**Note**: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04*.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Implementation Details
The code in this sample is a design pattern to generate multiple compute units using SYCL-compliant template metaprogramming.
@@ -99,14 +114,11 @@ SubmitComputeUnits(q, [=](auto ID) {
Each compute unit in the chain from `Source` to `Sink` must read from a unique pipe and write to the next pipe. As seen above, each compute unit knows its ID; therefore, its behavior can depend on this ID. Each compute unit in the chain will read from pipe `ID` and write to pipe `ID + 1`.
-## Set Environment Variables
-
-When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development.
-
## Build the `Compute Units` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -114,23 +126,10 @@ When working with the command-line interface (CLI), you should configure the one
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On Linux*
@@ -157,13 +156,17 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
make fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
make report
```
The report resides at `compute_units_report.prj/reports/report.html`. You can visualize the kernels and pipes generated by looking at the "System Viewer" section of the report. Note that each compute unit is shown as a unique kernel in the reports, with names `ChainComputeUnit<0>`, `ChainComputeUnit<1>`, and so on.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
@@ -197,29 +200,23 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
nmake fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
nmake report
```
The report resides at `compute_units_report.prj.a/reports/report.html`. You can visualize the kernels and pipes generated by looking at the "System Viewer" section of the report. Note that each compute unit is shown as a unique kernel in the reports, with names `ChainComputeUnit<0>`, `ChainComputeUnit<1>`, and so on.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
>**Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your `build` directory in a shorter path, for example `C:\samples\build`. You can then build the sample in the new location, but you must specify the full path to the build files.
-#### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-```
-make VERBOSE=1
-```
-If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the [Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html) for more information on using the utility.
-
-
## Run the `Compute Units` Sample
### On Linux
@@ -228,7 +225,11 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
./compute_units.fpga_emu
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./compute_units.fpga_sim
+ ```
+3. Run the sample on the FPGA device.
```
./compute_units.fpga
```
@@ -238,53 +239,24 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
compute_units.fpga_emu.exe
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ compute_units.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device.
```
compute_units.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Arria 10) |`qsub -l nodes=1:fpga_runtime:arria10:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, [Launch and manage jobs](https://devcloud.intel.com/oneapi/documentation/job-submission/) in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:arria10` or `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI [*Intel® oneAPI Base Toolkit Get Started*](https://devcloud.intel.com/oneapi/get_started/) page.
-
## Example Output
```
PASSED: The results are correct
```
## License
-Code samples are licensed under the MIT license. See
-[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
-Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
+Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
+
+Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/CMakeLists.txt
index a13a63c39f..dfb0ca6cf9 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(SOURCE_FILE compute_units.cpp)
set(TARGET_NAME compute_units)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# FPGA board selection
@@ -22,9 +23,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -38,11 +41,24 @@ set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE}
# [link] icpx -fsycl -fintelfpga compute_units.cpp.o -o compute_units.fpga_emu
add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) # CMake automatically adds #include'd headers to the dependency list
target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include)
-target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include)
set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}")
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+# To compile in a single command:
+# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim
+# CMake executes:
+# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp
+# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
@@ -52,7 +68,6 @@ set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a)
# The compile output is not an executable, but an intermediate compilation result unique to SYCL.
add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE})
target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include)
-target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include)
add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE})
set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}")
set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early")
@@ -68,7 +83,6 @@ set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK
# [link] icpx -fsycl -fintelfpga -Xshardware -Xstarget= compute_units.cpp.o -o compute_units.fpga
add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE})
target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include)
-target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include)
add_custom_target(fpga DEPENDS ${FPGA_TARGET})
set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}")
set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}")
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/compute_units.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/compute_units.cpp
index 247bf51086..f679e56314 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/compute_units.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/compute_units/src/compute_units.cpp
@@ -43,16 +43,24 @@ void SinkKernel(queue &q, float &out_data) {
int main() {
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
float out_data = 0;
try {
- queue q(device_selector, fpga_tools::exception_handler);
+ queue q(selector, fpga_tools::exception_handler);
+
+ sycl::device device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
// Enqueue the Source kernel
SourceKernel(q, kTestData);
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/README.md
index 07f4b30b4e..8d992f4108 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/README.md
@@ -18,6 +18,27 @@ This sample demonstrates double buffering to overlap kernel execution with buffe
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 2 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier3 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
| Optimized for | Description
|:--- |:---
| OS | Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
@@ -26,7 +47,7 @@ This sample demonstrates double buffering to overlap kernel execution with buffe
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -35,12 +56,6 @@ This sample demonstrates double buffering to overlap kernel execution with buffe
>**Note**: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04*.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Implementation Details
The key concepts discussed in this sample are as followed:
@@ -49,14 +64,11 @@ The key concepts discussed in this sample are as followed:
- Determining when double buffering is beneficial
- How to measure the impact of double buffering
-## Set Environment Variables
-
-When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development.
-
## Build the `Double Buffering` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -64,23 +76,10 @@ When working with the command-line interface (CLI), you should configure the one
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On Linux*
@@ -189,7 +188,7 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
2. Run the sample on the FPGA simulator device.
```
- ./double_buffering.fpga_sim
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./double_buffering.fpga_sim
```
3. Run the sample on the FPGA device.
```
@@ -204,49 +203,15 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
2. Run the sample on the FPGA simulator device.
```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
double_buffering.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
```
3. Run the sample on the FPGA device.
```
double_buffering.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Arria 10) |`qsub -l nodes=1:fpga_runtime:arria10:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, [Launch and manage jobs](https://devcloud.intel.com/oneapi/documentation/job-submission/) in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:arria10` or `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI [*Intel® oneAPI Base Toolkit Get Started*](https://devcloud.intel.com/oneapi/get_started/) page.
-
-
## Example Output
### Example Output for an FPGA Device
@@ -382,7 +347,6 @@ In both runs, the total kernel execution time is similar as expected; however, w
## License
-Code samples are licensed under the MIT license. See
-[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
+Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
-Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
+Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt
index f1ed949f0f..cd4b6e57de 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/CMakeLists.txt
@@ -23,11 +23,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR ${MATH_FLAGS}")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR ${MATH_FLAGS}")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR ${MATH_FLAGS}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR ${MATH_FLAGS}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga ${MATH_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${MATH_FLAGS} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA simulator and backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp
index 10ee9d1230..5d120e5ae0 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/double_buffering/src/double_buffering.cpp
@@ -203,34 +203,36 @@ void ProcessInput(buffer &buf) {
}
int main() {
-// Create queue, get platform and device
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
- std::cout << "\nEmulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
- "exemplified in this tutorial.\n\n";
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector device_selector;
- std::cout << "\nSimulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
- "exemplified in this tutorial.\n\n";
-#else
- ext::intel::fpga_selector device_selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#endif
+
+#ifndef FPGA_HARDWARE
+ std::cout << "\nEmulator and simulator outputs do not demonstrate "
+ "true hardware performance. The design may need to run "
+ "on actual hardware to observe the performance benefit "
+ "of the optimization exemplified in this tutorial.\n\n";
#endif
try {
auto prop_list = property_list{property::queue::enable_profiling()};
- sycl::queue q(device_selector, fpga_tools::exception_handler, prop_list);
+ sycl::queue q(selector, fpga_tools::exception_handler, prop_list);
platform platform = q.get_context().get_platform();
device device = q.get_device();
+
std::cout << "Platform name: "
<< platform.get_info().c_str() << "\n";
- std::cout << "Device name: "
- << device.get_info().c_str() << "\n\n\n";
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
std::cout << "Executing kernel " << kTimes << " times in each round.\n\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/README.md
index 42d81acdb1..0d6ddbf035 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/README.md
@@ -14,6 +14,27 @@ The purpose of this tutorial is to demonstrate an alternative coding style that
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 2 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier3 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
| Optimized for | Description
|:--- |:---
| OS | Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
@@ -22,7 +43,7 @@ The purpose of this tutorial is to demonstrate an alternative coding style that
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -31,12 +52,6 @@ The purpose of this tutorial is to demonstrate an alternative coding style that
>**Note**: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04*.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Implementation Details
### Implicit and Explicit Data Movement
@@ -72,15 +87,11 @@ Choosing a data movement strategy largely depends on the specific application an
Alternatively, there is a hybrid approach that uses some implicit data movement and some explicit data movement. This technique, demonstrated in the **Double Buffering** (double_buffering) and **N-Way Buffering** (n_way_buffering) tutorials, uses implicit data movement for some buffers where the control does not affect performance, and explicit data movement for buffers whose movement has a substantial effect on performance. In this hybrid approach, we do **not** use device allocations but rather specific `buffer` API calls (e.g., `update_host`) to trigger the movement of data.
-
-## Set Environment Variables
-
-When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development.
-
## Build the `Explicit Data Movement` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -88,23 +99,10 @@ When working with the command-line interface (CLI), you should configure the one
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On Linux*
@@ -132,13 +130,17 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
make fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
make report
```
The report resides at `explicit_data_movement.prj/reports/report.html`. Note that because the optimization occurs at the *runtime* level, the FPGA compiler report will not show a difference between the optimized and unoptimized cases.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
@@ -172,28 +174,23 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
nmake fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
nmake report
```
The report resides at `explicit_data_movement.prj.a/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
>**Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your `build` directory in a shorter path, for example `C:\samples\build`. You can then build the sample in the new location, but you must specify the full path to the build files.
-#### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-```
-make VERBOSE=1
-```
-If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the *[Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html)* for more information on using the utility.
-
## Run the `Explicit Data Movement` Sample
### On Linux
@@ -202,7 +199,11 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
./explicit_data_movement.fpga_emu
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./explicit_data_movement.fpga_sim
+ ```
+3. Run the sample on the FPGA device.
```
./explicit_data_movement.fpga
```
@@ -213,47 +214,17 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
explicit_data_movement.fpga_emu.exe
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ explicit_data_movement.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device.
```
explicit_data_movement.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Arria 10) |`qsub -l nodes=1:fpga_runtime:arria10:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, [Launch and manage jobs](https://devcloud.intel.com/oneapi/documentation/job-submission/) in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:arria10` or `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI [*Intel® oneAPI Base Toolkit Get Started*](https://devcloud.intel.com/oneapi/get_started/) page.
-
-
## Example Output
### Output Example for FPGA Emulator
@@ -276,7 +247,6 @@ PASSED
## License
-Code samples are licensed under the MIT license. See
-[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
+Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
-Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
+Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/CMakeLists.txt
index dc99f3ab37..83c3f1a58a 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(SOURCE_FILE explicit_data_movement.cpp)
set(TARGET_NAME explicit_data_movement)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# FPGA board selection
@@ -22,9 +23,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -42,6 +45,20 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+# To compile in a single command:
+# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim
+# CMake executes:
+# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp
+# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/explicit_data_movement.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/explicit_data_movement.cpp
index c63ee1186f..18c2dafbe2 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/explicit_data_movement.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/explicit_data_movement/src/explicit_data_movement.cpp
@@ -142,6 +142,9 @@ int main(int argc, char *argv[]) {
#if defined(FPGA_EMULATOR)
size_t size = 10000;
size_t iters = 1;
+#elif defined(FPGA_SIMULATOR)
+ size_t size = 100;
+ size_t iters = 1;
#else
size_t size = 100000000;
size_t iters = 5;
@@ -159,11 +162,13 @@ int main(int argc, char *argv[]) {
}
try {
- // device selector
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// queue properties to enable profiling
@@ -173,13 +178,17 @@ int main(int argc, char *argv[]) {
queue q(selector, fpga_tools::exception_handler, prop_list);
// make sure the device supports USM device allocations
- device d = q.get_device();
- if (!d.get_info()) {
+ auto device = q.get_device();
+ if (!device.get_info()) {
std::cerr << "ERROR: The selected device does not support USM device"
<< " allocations\n";
return 1;
}
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// input and output data
std::vector in(size);
std::vector out_gold(size), out_implicit(size), out_explicit(size);
@@ -236,7 +245,7 @@ int main(int argc, char *argv[]) {
if (passed) {
// The emulator does not accurately represent real hardware performance.
// Therefore, we don't show performance results when running in emulation.
-#ifndef FPGA_EMULATOR
+#if !defined(FPGA_EMULATOR) && !defined(FPGA_SIMULATOR)
double implicit_avg_lat =
std::accumulate(implicit_kernel_latency.begin() + 1,
implicit_kernel_latency.end(), 0.0)
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/README.md
index 4762b9be2f..b74d5d1994 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/README.md
@@ -14,15 +14,36 @@ The purpose of this code sample is to demonstrate how to do trivial I/O streamin
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
| Optimized for | Description
|:--- |:---
| OS | Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA
FPGA Programmable Acceleration Card (PAC) D5005 (with Intel Stratix® 10 SX)
FPGA third-party/custom platforms with oneAPI support
-| Software | Intel® oneAPI DPC++/C++ Compiler
Intel® FPGA Add-On for oneAPI Base Toolkit
+| Software | Intel® oneAPI DPC++/C++ Compiler
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -31,13 +52,6 @@ The purpose of this code sample is to demonstrate how to do trivial I/O streamin
>**Note**: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04*.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
-
## Key Implementation Details
The following sections will describe I/O streaming and I/O pipes in more detail and conclude with a description of the trivial design used to demonstrate these concepts and features.
@@ -104,8 +118,9 @@ Notice that the main kernel in the `SubmitSideChannelKernels` function in *src/S
## Build the `IO Streaming with SYCL IO Pipes` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -113,23 +128,10 @@ Notice that the main kernel in the `SubmitSideChannelKernels` function in *src/S
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On Linux*
@@ -156,13 +158,17 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
make fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
make report
```
The report resides at `io_streaming_report.prj/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
make fpga
```
@@ -195,28 +201,23 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
nmake fpga_emu
```
- 2. Generate HTML performance report.
+ 2. Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
+ 3. Generate HTML performance report.
```
nmake report
```
The report resides at `io_streaming_report.prj.a/reports/report.html`.
- 3. Compile for FPGA hardware (longer compile time, targets FPGA device).
+ 4. Compile for FPGA hardware (longer compile time, targets FPGA device).
```
nmake fpga
```
>**Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your `build` directory in a shorter path, for example `C:\samples\build`. You can then build the sample in the new location, but you must specify the full path to the build files.
-#### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-```
-make VERBOSE=1
-```
-If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the *[Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html)* for more information on using the utility.
-
## Run the `IO Streaming with SYCL IO Pipes` Sample
### On Linux
@@ -225,7 +226,11 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
./io_streaming.fpga_emu
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./io_streaming.fpga_sim
+ ```
+3. Run the sample on the FPGA device.
```
./io_streaming.fpga
```
@@ -236,46 +241,17 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
io_streaming.fpga_emu.exe
```
-2. Run the sample on the FPGA device.
+2. Run the sample on the FPGA simulator device:
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ io_streaming.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device.
```
io_streaming.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Arria 10) |`qsub -l nodes=1:fpga_runtime:arria10:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, [Launch and manage jobs](https://devcloud.intel.com/oneapi/documentation/job-submission/) in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:arria10` or `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI [*Intel® oneAPI Base Toolkit Get Started*](https://devcloud.intel.com/oneapi/get_started/) page.
-
-
## Example Output
```
@@ -289,7 +265,6 @@ PASSED
## License
-Code samples are licensed under the MIT license. See
-[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
+Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
-Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
+Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/CMakeLists.txt
index 42ec6d83e3..2e88ff0ff1 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(SOURCE_FILE io_streaming.cpp)
set(TARGET_NAME io_streaming)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
# FPGA board selection
@@ -28,9 +29,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR ${USM_HOST_ALLOCATIONS}")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR ${USM_HOST_ALLOCATIONS}")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga ${USM_HOST_ALLOCATIONS}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR ${USM_HOST_ALLOCATIONS}")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${USM_HOST_ALLOCATIONS} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -48,6 +51,20 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+# To compile in a single command:
+# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim
+# CMake executes:
+# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp
+# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/io_streaming.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/io_streaming.cpp
index 54b2287f9d..c57207dd0c 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/io_streaming.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/io_streaming/src/io_streaming.cpp
@@ -32,16 +32,20 @@ int main() {
#if defined(FPGA_EMULATOR)
size_t count = 1 << 12;
+#elif defined(FPGA_SIMULATOR)
+ size_t count = 1 << 5;
#else
size_t count = 1 << 24;
#endif
try {
// device selector
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// queue properties to enable SYCL profiling of kernels
@@ -50,6 +54,12 @@ int main() {
// create the device queue
queue q(selector, fpga_tools::exception_handler, prop_list);
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// run the loopback example system
// see 'LoopbackTest.hpp'
std::cout << "Running loopback test\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/README.md
index 5ec6a722ea..049ddc4b9a 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/README.md
@@ -17,6 +17,27 @@ This tutorial sample demonstrates the following concepts:
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
| Optimized for | Description
|:--- |:---
| OS | Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
@@ -25,19 +46,13 @@ This tutorial sample demonstrates the following concepts:
> **Note**: Even though the Intel® DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Implementation Details
This tutorial demonstrates how to remove a loop-carried dependency in FPGA device code.
@@ -89,14 +104,11 @@ Look at the _Compiler Report > Throughput Analysis > Loop Analysis_ section in t
* sum (_filename:line_)
```
-## Set Environment Variables
-
-When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development.
-
## Build the `Remove Loop Carried Dependency` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -104,25 +116,10 @@ When working with the command-line interface (CLI), you should configure the one
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
-
-### On Linux*
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
1. Change to the sample directory.
2. Build the program for **Intel® PAC with Intel Arria® 10 GX FPGA**, which is the default.
@@ -224,7 +221,7 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
2. Run the sample on the FPGA simulator device.
```
- ./loop_carried_dependency.fpga_sim
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./loop_carried_dependency.fpga_sim
```
3. Run the sample on the FPGA device.
```
@@ -239,46 +236,15 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
2. Run the sample on the FPGA simulator device.
```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
loop_carried_dependency.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
```
3. Run the sample on the FPGA device.
```
loop_carried_dependency.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -I -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -I -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -I -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -I -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, *[Launch and manage jobs](https://devcloud.intel.com/oneapi/documentation/job-submission/)* in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI *[Intel® oneAPI Base Toolkit Get Started](https://devcloud.intel.com/oneapi/get_started/)* page.
-
## Example Output
### Example Output on FPGA Device
@@ -313,4 +279,4 @@ PASSED
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
-Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
\ No newline at end of file
+Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt).
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/CMakeLists.txt
index b766d30e77..3d52bdaf17 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/CMakeLists.txt
@@ -23,11 +23,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(SIMULATOR_COMPILE_FLAGS "-Wall -fsycl -fintelfpga ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/loop_carried_dependency.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/loop_carried_dependency.cpp
index bae49a1fe6..fb2844cf5e 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/loop_carried_dependency.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/loop_carried_dependency/src/loop_carried_dependency.cpp
@@ -128,16 +128,19 @@ int main(int argc, char *argv[]) {
// Initialize queue with device selector and enabling profiling
// Create queue, get platform and device
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
- cout << "\nEmulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#endif
+
+#ifndef FPGA_HARDWARE
+ cout << "\nEmulator and simulator outputs do not demonstrate true "
+ "hardware performance. The design may need to run on actual "
+ "hardware to observe the performance benefit of the optimization "
"exemplified in this tutorial.\n\n";
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
#endif
double unopt_sum = -1, opt_sum = -1;
@@ -147,6 +150,12 @@ int main(int argc, char *argv[]) {
queue q(selector, fpga_tools::exception_handler,
property::queue::enable_profiling{});
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// compute result on device
PrintTime(Unoptimized(q, vec_a, vec_b, unopt_sum, n), q, "Unoptimized");
PrintTime(Optimized(q, vec_a, vec_b, opt_sum, n), q, "Optimized");
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md
index 6d73b43dc6..7cf4383846 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/README.md
@@ -15,6 +15,27 @@ This system-level optimization enables kernel execution to occur in parallel wit
## Prerequisites
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
| Optimized for | Description
|:--- |:---
| OS | Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
@@ -23,19 +44,13 @@ This system-level optimization enables kernel execution to occur in parallel wit
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
-### Additional Documentation
-
-- *[Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html)* helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- *[FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide)* helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- *[Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide)* helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Implementation Details
This sample covers the following key concepts:
@@ -127,14 +142,11 @@ It is useful to think of the execution space as having **N** slots where the slo
After each kernel is launched, the host-side operations (that occur *after* the kernel in that slot completes) are launched immediately from the `main()` program. They block until the kernel execution for that slot completes (this is enforced by the runtime).
-## Set Environment Variables
-
-When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. Set up your CLI environment by sourcing the `setvars` script every time you open a new terminal window. This practice ensures that your compiler, libraries, and tools are ready for development.
-
## Build the `N-Way Buffering` Sample
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script in the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
@@ -142,24 +154,10 @@ When working with the command-line interface (CLI), you should configure the one
> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
-> - `C:\Program Files (x86)\Intel\oneAPI\setvars.bat`
+> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
-> For more information on configuring environment variables, see *[Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html)* or *[Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html)*.
-
-
-### Use Visual Studio Code* (VS Code) (Optional)
-
-You can use Visual Studio Code* (VS Code) extensions to set your environment,
-create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- 1. Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- 2. Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- 3. Open a terminal in VS Code (**Terminal > New Terminal**).
- 4. Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the *[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html)*.
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On Linux*
@@ -251,18 +249,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
nmake fpga
```
->**Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your `build` directory in a shorter path, for example `C:\samples\build`. You can then build the sample in the new location, but you must specify the full path to the build files.
-
-#### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-```
-make VERBOSE=1
-```
-If you receive an error message, troubleshoot the problem using the **Diagnostics Utility for Intel® oneAPI Toolkits**. The diagnostic utility provides configuration and system checks to help find missing dependencies, permissions errors, and other issues. See the *[Diagnostics Utility for Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html)* for more information on using the utility.
-
-
## Run the `N-Way Buffering` Sample
### On Linux
@@ -273,7 +259,7 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
2. Run the sample on the FPGA emulator (the kernel executes on the CPU).
```
- ./n_way_buffering.fpga_sim
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./n_way_buffering.fpga_sim
```
3. Run the sample on the FPGA device.
```
@@ -288,47 +274,15 @@ If you receive an error message, troubleshoot the problem using the **Diagnostic
```
2. Run the sample on the FPGA emulator (the kernel executes on the CPU).
```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
n_way_buffering.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
```
3. Run the sample on the FPGA device.
```
n_way_buffering.fpga.exe
```
-### Build and Run the Samples on Intel® DevCloud (Optional)
-
-When running a sample in the Intel® DevCloud, you must specify the compute node (CPU, GPU, FPGA) and whether to run in batch or interactive mode.
-
->**Note**: Since Intel® DevCloud for oneAPI includes the appropriate development environment already configured, you do not need to set environment variables.
-
-Use the Linux instructions to build and run the program.
-
-You can specify an FPGA runtime node using a single line script similar to the following example.
-
-```
-qsub -I -l nodes=1:fpga_runtime:ppn=2 -d .
-```
-
-- `-I` (upper case I) requests an interactive session.
-- `-l nodes=1:fpga_runtime:ppn=2` (lower case L) assigns one full node.
-- `-d .` makes the current folder as the working directory for the task.
-
- |Available Nodes |Command Options
- |:--- |:---
- |FPGA Compile Time |`qsub -I -l nodes=1:fpga_compile:ppn=2 -d .`
- |FPGA Runtime (Stratix 10) |`qsub -I -l nodes=1:fpga_runtime:stratix10:ppn=2 -d .`
- |GPU |`qsub -I -l nodes=1:gpu:ppn=2 -d .`
- |CPU |`qsub -I -l nodes=1:xeon:ppn=2 -d .`
-
->**Note**: For more information on how to specify compute nodes read, *[Launch and manage jobs](https://DevCloud.intel.com/oneapi/documentation/job-submission/)* in the Intel® DevCloud for oneAPI Documentation.
-
-Only `fpga_compile` nodes support compiling to FPGA. When compiling for FPGA hardware, increase the job timeout to **12 hours**.
-
-Executing programs on FPGA hardware is only supported on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:stratix10`.
-
-Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® DevCloud for oneAPI *[Intel® oneAPI Base Toolkit Get Started](https://DevCloud.intel.com/oneapi/get_started/)* page.
-
-
## Example Output
### Example Output on FPGA Device
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt
index 65b6d22b58..73cb4c3657 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/CMakeLists.txt
@@ -29,12 +29,12 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
-set(EMULATOR_LINK_FLAGS "${THREAD_LIB} -fsycl -fintelfpga")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR")
-set(SIMULATOR_LINK_FLAGS "${THREAD_LIB} -fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga")
-set(HARDWARE_LINK_FLAGS "${THREAD_LIB} -fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${THREAD_LIB}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${THREAD_LIB} -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
+set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${THREAD_LIB} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA simulator and backend compilation
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp
index 891e901935..f6ef224450 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/n_way_buffering/src/n_way_buffering.cpp
@@ -214,33 +214,33 @@ void ProcessInput(buffer &buf, std::vector ©) {
int main() {
// Create queue, get platform and device
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
- std::cout << "\nEmulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
- "exemplified in this tutorial.\n\n";
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector device_selector;
- std::cout << "\nSimulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
- "exemplified in this tutorial.\n\n";
-#else
- ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#endif
+
+#ifndef FPGA_HARDWARE
+ std::cout << "\nEmulator and simulator outputs do not demonstrate "
+ "true hardware performance. The design may need to run "
+ "on actual hardware to observe the performance benefit "
+ "of the optimization exemplified in this tutorial.\n\n";
#endif
try {
auto prop_list = property_list{property::queue::enable_profiling()};
- sycl::queue q(device_selector, fpga_tools::exception_handler, prop_list);
+ sycl::queue q(selector, fpga_tools::exception_handler, prop_list);
platform platform = q.get_context().get_platform();
device device = q.get_device();
std::cout << "Platform name: "
<< platform.get_info().c_str() << "\n";
- std::cout << "Device name: "
- << device.get_info().c_str() << "\n\n\n";
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
std::cout << "Executing kernel " << kTimes << " times in each round.\n\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md
index c6511459a0..36e5700fe4 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/README.md
@@ -11,14 +11,35 @@ This FPGA tutorial demonstrates how to build a simple cache (implemented in FPGA
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
## Purpose
In SYCL* task kernels for FPGA, our objective is to achieve an initiation interval (II) of 1 on performance-critical loops. This means that a new loop iteration is launched on every clock cycle, maximizing the loop's throughput.
@@ -41,7 +62,6 @@ In a design with II=1 critical loops but lower than desired fMAX, the
To check whether this is the case for a given design, view the "Kernel Memory Viewer" section of the optimization report. Select the on-chip memory of interest from the Kernel Memory List, and mouse over the load operation "LD" to check its latency. If the latency of the load operation is 1, this is a clear sign that the compiler has attempted to sacrifice fMAX to improve loop II.
-
### Implementing the on-chip memory with cache technique
The tutorial demonstrates the technique using a program that computes a histogram. The histogram operation accepts an input vector of values, separates the values into groups, and counts the number of values per group. For each input value, an output group is determined, and the count for that group is incremented. This count is stored in the on-chip memory, and the increment operation requires reading from memory, performing the increment, and storing the result. This read-modify-write operation is the critical path that can result in II > 1.
@@ -56,11 +76,6 @@ For user designs, each iteration takes only a few moments to compile the reports
This tutorial creates multiple kernels sweeping across different cache depths within a single design. This allows a single compile of the reports to determine the optimal cache depth.
-### Additional Documentation
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Concepts
* How to implement the on-chip memory cache optimization technique
* The scenarios in which this technique benefits performance
@@ -68,41 +83,20 @@ This tutorial creates multiple kernels sweeping across different cache depths wi
## Building the `onchip_memory_cache` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-### Include Files
-The included headers `onchip_memory_with_cache.hpp` and `unrolled_loop.hpp` are located in the same Code Samples GIT repo as this tutorial.
-
-### Running Samples in Intel® DevCloud
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- - Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- - Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- - Open a Terminal in VS Code (**Terminal>New Terminal**).
- - Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -127,22 +121,22 @@ To learn more about the extensions and how to configure the oneAPI environment,
2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow:
- * Compile for emulation (fast compile time, targets emulated FPGA device):
- ```
- make fpga_emu
- ```
- * Generate the optimization report:
- ```
- make report
- ```
- * Compile for simulation (fast compile time, targets simulated FPGA device, reduced data size):
- ```
- make fpga_sim
- ```
- * Compile for FPGA hardware (longer compile time, targets FPGA device):
- ```
- make fpga
- ```
+ * Compile for emulation (fast compile time, targets emulated FPGA device):
+ ```
+ make fpga_emu
+ ```
+ * Generate the optimization report:
+ ```
+ make report
+ ```
+ * Compile for simulation (fast compile time, targets simulated FPGA device, reduced data size):
+ ```
+ make fpga_sim
+ ```
+ * Compile for FPGA hardware (longer compile time, targets FPGA device):
+ ```
+ make fpga
+ ```
3. (Optional) As the above hardware compile may take several hours to complete, FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) can be downloaded here.
### On a Windows* System
@@ -189,22 +183,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
> **Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example c:\samples\build. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
-### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-
- ### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html)
-
-
## Examining the Reports
Locate `report.html` in the `onchip_memory_cache_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
@@ -212,24 +190,29 @@ Compare the Loop Analysis reports for kernels with various cache depths, as desc
Open the Kernel Memory viewer and compare the Load Latency on the loads from kernels with various cache depths, as describe in the "When is the on-chip memory cache technique applicable?" section. This will illustrate that a cache depth of at least 7 is required to achieve a load latency of > 1.
-
## Running the Sample
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
- ```
- ./onchip_memory_cache.fpga_emu (Linux)
- onchip_memory_cache.fpga_emu.exe (Windows)
- ```
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
+ ```
+ ./onchip_memory_cache.fpga_emu (Linux)
+ onchip_memory_cache.fpga_emu.exe (Windows)
+ ```
2. Run the sample on the FPGA simulator device:
- ```
- ./onchip_memory_cache.fpga_sim (Linux)
- onchip_memory_cache.fpga_sim.exe (Windows)
- ```
+ * On Linux
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./onchip_memory_cache.fpga_sim
+ ```
+ * On Windows
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ onchip_memory_cache.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
3. Run the sample on the FPGA device:
- ```
- ./onchip_memory_cache.fpga (Linux)
- onchip_memory_cache.fpga.exe (Windows)
- ```
+ ```
+ ./onchip_memory_cache.fpga (Linux)
+ onchip_memory_cache.fpga.exe (Windows)
+ ```
### Example of Output
@@ -299,6 +282,7 @@ Because the fMAX of a design is determined by the slowest kernel, we
When caching is used, performance noticeably increases. As previously mentioned, this technique should result in an II reduction, which should lead to a throughput improvement. The technique can also improve fMAX if the compiler had previously implemented a latency=1 load operation, in which case the fMAX increase should result in a further throughput improvement.
## License
+
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt
index a0bd515f2f..eafb0596e4 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/CMakeLists.txt
@@ -38,12 +38,12 @@ set(CACHE_DEPTH_FLAG "-DMAX_CACHE_DEPTH=${MAX_CACHE_DEPTH} -DMIN_CACHE_DEPTH=${M
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} ${CACHE_DEPTH_FLAG} ${AC_TYPES_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
-set(EMULATOR_LINK_FLAGS "${AC_TYPES_FLAG} -fsycl -fintelfpga")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${CACHE_DEPTH_FLAG} ${AC_TYPES_FLAG} -DFPGA_EMULATOR")
+set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}")
set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${CACHE_DEPTH_FLAG} ${USER_HARDWARE_FLAGS}")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} ${CACHE_DEPTH_FLAG} ${AC_TYPES_FLAG} -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${CACHE_DEPTH_FLAG} ${AC_TYPES_FLAG} -Xssimulation -DFPGA_SIMULATOR")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${CACHE_DEPTH_FLAG} ${USER_HARDWARE_FLAGS} ${AC_TYPES_FLAG}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} ${CACHE_DEPTH_FLAG} ${AC_TYPES_FLAG} -fsycl -fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${CACHE_DEPTH_FLAG} ${AC_TYPES_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${CACHE_DEPTH_FLAG} ${USER_HARDWARE_FLAGS} ${AC_TYPES_FLAG}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp
index 9769cf2ca1..3efb2f4fe2 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/onchip_memory_cache/src/onchip_memory_cache.cpp
@@ -73,35 +73,35 @@ int main() {
double time_kernel;
// Create queue, get platform and device
-#if defined(FPGA_EMULATOR)
- sycl::ext::intel::fpga_emulator_selector device_selector;
- std::cout << "\nEmulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
- "exemplified in this tutorial.\n\n";
-#elif defined(FPGA_SIMULATOR)
- sycl::ext::intel::fpga_simulator_selector device_selector;
- std::cout << "\nSimulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
- "exemplified in this tutorial.\n\n";
-#else
- sycl::ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
+#endif
+
+#ifndef FPGA_HARDWARE
+ std::cout << "\nEmulator and simulator outputs do not demonstrate "
+ "true hardware performance. The design may need to run "
+ "on actual hardware to observe the performance benefit "
+ "of the optimization exemplified in this tutorial.\n\n";
#endif
+
try {
auto prop_list =
sycl::property_list{sycl::property::queue::enable_profiling()};
- sycl::queue q(device_selector, fpga_tools::exception_handler, prop_list);
+ sycl::queue q(selector, fpga_tools::exception_handler, prop_list);
sycl::platform platform = q.get_context().get_platform();
sycl::device device = q.get_device();
std::cout << "Platform name: "
<< platform.get_info().c_str()
<< "\n";
- std::cout << "Device name: "
- << device.get_info().c_str()
- << "\n\n\n";
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
std::cout << "\nNumber of inputs: " << kInitNumInputs << "\n";
std::cout << "Number of outputs: " << kNumOutputs << "\n\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/README.md
index 0a958d1ea1..fc594bceeb 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/README.md
@@ -11,13 +11,35 @@ This FPGA tutorial discusses optimizing the throughput of an inner loop with a l
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
## Purpose
This tutorial will show how to optimize the throughput of an inner loop with a low trip count. A *low* trip count is relative. In this tutorial, we will consider *low* to be on the order of 100 or fewer iterations.
@@ -95,50 +117,25 @@ while (Pipe::read()) {
}
```
-### Additional Documentation
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Concepts
* Optimizing the throughput of inner loops with low trip counts by using the `speculated_iterations` attribute and explicit loop bounding
-
## Building the `optimize_inner_loop` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-
-### Running Samples in Intel® DevCloud
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- - Download a sample using the extension **Code Sample Browser for Intel oneAPI Toolkits**.
- - Configure the oneAPI environment with the extension **Environment Configurator for Intel oneAPI Toolkits**.
- - Open a Terminal in VS Code (**Terminal>New Terminal**).
- - Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -224,19 +221,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
*Note:* The Intel® PAC with Intel Arria® 10 GX FPGA and Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) do not support Windows*. Compiling to FPGA hardware on Windows* requires a third-party or custom Board Support Package (BSP) with Windows* support.
*Note:* If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example c:\samples\build. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
- ### Troubleshooting
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html).
-
## Examining the Reports
Locate `report.html` in the `optimize_inner_loop.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
@@ -259,10 +243,16 @@ Version 2 of the kernel (`Producer<2>`) explicitly bounds the inner loop trip co
optimize_inner_loop.fpga_emu.exe (Windows)
```
2. Run the sample on the FPGA simulator device:
- ```
- ./loop_carried_dependency.fpga_sim (Linux)
- loop_carried_dependency.fpga_sim.exe (Windows)
- ```
+ * On Linux
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./loop_carried_dependency.fpga_sim
+ ```
+ * On Windows
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ loop_carried_dependency.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
3. Run the sample on the FPGA device:
```
./optimize_inner_loop.fpga (Linux)
@@ -295,6 +285,7 @@ You should see the following output in the console:
NOTE: These throughput numbers were collected using the Intel® PAC with Intel Arria® 10 GX FPGA.
## License
+
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/CMakeLists.txt
index 5318141de3..57737c59e6 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/CMakeLists.txt
@@ -28,7 +28,7 @@ set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
set(SIMULATOR_COMPILE_FLAGS "-fsycl -Wall -fintelfpga ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR ${USER_SIMULATOR_FLAGS}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_SIMULATOR_FLAGS}")
# use cmake -D USER_SIMULATOR_FLAGS= to set extra flags for FPGA simulator compilation
-set(HARDWARE_COMPILE_FLAGS "-fsycl -Wall -fintelfpga ${WIN_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -Wall -fintelfpga ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/optimize_inner_loop.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/optimize_inner_loop.cpp
index e27a8f3495..470f2b9d4e 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/optimize_inner_loop.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/optimize_inner_loop/src/optimize_inner_loop.cpp
@@ -60,12 +60,12 @@ void SubmitKernels(std::vector &in, int &res, double &kernel_time_ms) {
static_assert(spec_iters >= 0, "spec_iters must be positive");
// the device selector
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// the pipe
@@ -79,6 +79,12 @@ void SubmitKernels(std::vector &in, int &res, double &kernel_time_ms) {
auto prop_list = property_list{property::queue::enable_profiling()};
queue q(selector, fpga_tools::exception_handler, prop_list);
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// The input data buffer
buffer in_buf(in);
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/README.md
index 79e0dc7004..0f9046d6f1 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/README.md
@@ -12,14 +12,35 @@ This FPGA tutorial showcases a design pattern that makes it possible to create a
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
## Purpose
In certain situations, it is useful to create a collection of pipes that can be indexed like an array in a SYCL-compliant FPGA design. If you are not yet familiar with pipes, refer to the prerequisite tutorial "Data Transfers Using Pipes".
@@ -137,49 +158,26 @@ The host must thus enqueue the producer kernel and `kNumRows * kNumCols` separat
}
```
-### Additional Documentation
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Concepts
* A design pattern to generate an array of pipes.
* Static loop unrolling through template metaprogramming.
## Building the `pipe_array` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-
-### Running Samples in Intel® DevCloud
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- - Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- - Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- - Open a Terminal in VS Code (**Terminal>New Terminal**).
- - Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -266,22 +264,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
> **Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example c:\samples\build. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
-### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-
- ### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html)
-
-
## Examining the Reports
Locate `report.html` in the `pipe_array_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
@@ -295,10 +277,16 @@ You can visualize the kernels and pipes generated by looking at the "System View
pipe_array.fpga_emu.exe (Windows)
```
2. Run the sample on the FPGA simulator device:
- ```
- ./pipe_array.fpga_sim (Linux)
- pipe_array.fpga_sim.exe (Windows)
- ```
+ * On Linux
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./pipe_array.fpga_sim
+ ```
+ * On Windows
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ pipe_array.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
3. Run the sample on the FPGA device:
```
./pipe_array.fpga (Linux)
@@ -317,6 +305,7 @@ PASSED: The results are correct
```
## License
+
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt
index c8044b3d2e..1616b37e26 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/CMakeLists.txt
@@ -23,11 +23,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp
index 76820f1655..2d91df08a4 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/pipe_array/src/pipe_array.cpp
@@ -119,16 +119,22 @@ int main(int argc, char *argv[]) {
for (size_t i = 0; i < array_size; i++)
producer_input[i] = i;
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
try {
- queue q(device_selector, fpga_tools::exception_handler);
+ queue q(selector, fpga_tools::exception_handler);
+
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
// Enqueue producer
buffer producer_buffer(producer_input);
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/README.md
index 0508dd4d94..b698f45e2b 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/README.md
@@ -11,13 +11,36 @@ This tutorial describes the process of _Shannonization_ (named after [Claude Sha
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
## Purpose
Demonstrate a loop optimization to improve the fMAX/II of an FPGA design.
@@ -120,47 +143,22 @@ void intersection(int A_size, int B_size, int& intersection_size) {
To achieve an II of 1 for the main `while` loop in the FPGA code shown above, the compiler must schedule three 32-bit Compare Operations, a 32-bit Add Operation, a 32-bit Select Operation (i.e., a pipe read), and a 1-bit And Operation into a single cycle. This is necessary since the actions of the *next* iteration of the loop depend on the result of the loop's current iteration. More specifically, the current iteration must: compare the current values of `a` and `b`, compare the number of elements read from the pipes (i.e. `A_count < A_size` and `B_count < B_size`), increment `A_count` or `B_count`, and then update either `a` or `b` by reading the respective pipe before the next iteration of the loop can enter the same block of code. This creates a long critical path that requires a tradeoff in fMAX or II (i.e., either fMAX must decrease or II must increase). This tutorial will explain optimizations that remove these operations from the critical path (at the expense of some area) and improve the fMAX/II tradeoff and, therefore, the throughput.
-### Additional Documentation
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Building the `shannonization` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-
-### Running Samples in Intel® DevCloud
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- - Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- - Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- - Open a Terminal in VS Code (**Terminal>New Terminal**).
- - Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
-
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
1. Generate the `Makefile` by running `cmake`.
@@ -188,6 +186,10 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
make fpga_emu
```
+ * Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ make fpga_sim
+ ```
* Generate the optimization report:
```
make report
@@ -225,6 +227,10 @@ To learn more about the extensions and how to configure the oneAPI environment,
```
nmake fpga_emu
```
+ * Compile for simulation (fast compile time, targets simulator FPGA device):
+ ```
+ nmake fpga_sim
+ ```
* Generate the optimization report:
```
nmake report
@@ -236,19 +242,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
*Note:* The Intel® PAC with Intel Arria® 10 GX FPGA and Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) do not support Windows*. Compiling to FPGA hardware on Windows* requires a third-party or custom Board Support Package (BSP) with Windows* support.
-### Troubleshooting
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html).
-
## Examining the Reports
This section will walk through how the HTML reports show the result of the optimizations we made in each version of the kernel, the definition of which can be found in `src/IntersectionKernel.hpp`. Start by locating `report.html` in the `shannonization_report.prj/reports/` directory. Open the report in Chrome*, Firefox*, Edge*, or Internet Explorer*. The fMAX numbers mentioned in these sections assume that the Arria® 10 GX FPGA is the target. However, the discussion is similar for the Stratix® 10 SX FPGA.
@@ -330,16 +323,27 @@ As a consequence of the fabric architecture of the Intel Stratix® 10 SX FPGA
## Running the Sample
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
- ```
- ./shannonization.fpga_emu (Linux)
- shannonization.fpga_emu.exe (Windows)
- ```
-2. Run the sample on the FPGA device:
- ```
- ./shannonization.fpga (Linux)
- shannonization.fpga.exe (Windows)
- ```
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
+ ```
+ ./shannonization.fpga_emu (Linux)
+ shannonization.fpga_emu.exe (Windows)
+ ```
+2. Run the sample on the FPGA simulator device:
+ * On Linux
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./shannonization.fpga_sim
+ ```
+ * On Windows
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ shannonization.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device:
+ ```
+ ./shannonization.fpga (Linux)
+ shannonization.fpga.exe (Windows)
+ ```
### Application Parameters
The following table explains the command line arguments that can be passed to the `shannonization` program.
@@ -380,6 +384,7 @@ You should see the following output in the console:
> **Note**: These throughput numbers were collected using the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX).
## License
+
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/CMakeLists.txt
index 2bef9c36e4..edbf970b18 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/CMakeLists.txt
@@ -1,6 +1,7 @@
set(SOURCE_FILE shannonization.cpp)
set(TARGET_NAME shannonization)
set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim)
set(FPGA_TARGET ${TARGET_NAME}.fpga)
set(REPORTS_TARGET ${TARGET_NAME}_report)
@@ -36,14 +37,17 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR ${DEVICE_FLAG}")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR ${DEVICE_FLAG}")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga ${DEVICE_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR ${DEVICE_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${DEVICE_FLAG} -DFPGA_HARDWARE")
if(FPGA_DEVICE MATCHES ".s10.*")
# hyper-optimized-handshaking only applies to Intel Stratix® 10 FPGAs
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xshyper-optimized-handshaking=off -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_HARDWARE_FLAGS}")
+ set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xshyper-optimized-handshaking=off -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_HARDWARE_FLAGS}")
else()
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_HARDWARE_FLAGS}")
+ set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_HARDWARE_FLAGS}")
endif()
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
@@ -61,6 +65,20 @@ set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_CO
set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+###############################################################################
+### FPGA Simulator
+###############################################################################
+# To compile in a single command:
+# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim
+# CMake executes:
+# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp
+# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim
+add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include)
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}")
+set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET})
+
###############################################################################
### Generate Report
###############################################################################
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/shannonization.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/shannonization.cpp
index a65de9d1e5..23e7cabee9 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/shannonization.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/shannonization/src/shannonization.cpp
@@ -124,7 +124,7 @@ bool Intersection(queue& q, std::vector& a,
// For emulation, just do a single iteration.
// For hardware, perform multiple iterations for a more
// accurate throughput measurement
-#if defined(FPGA_EMULATOR)
+#if defined(FPGA_EMULATOR) || defined(FPGA_SIMULATOR)
int iterations = 1;
#else
int iterations = 5;
@@ -163,7 +163,7 @@ bool Intersection(queue& q, std::vector& a,
// The FPGA emulator does not accurately represent the hardware performance
// so we don't print performance results when running with the emulator
if (success) {
-#ifndef FPGA_EMULATOR
+#if !defined(FPGA_EMULATOR) && !defined(FPGA_SIMULATOR)
// Compute the average throughput across all iterations.
// We use the first iteration as a 'warmup' for the FPGA,
// so we ignore its results.
@@ -187,7 +187,7 @@ bool Intersection(queue& q, std::vector& a,
int main(int argc, char** argv) {
// parse the command line arguments
-#if defined(FPGA_EMULATOR)
+#if defined(FPGA_EMULATOR) || defined(FPGA_SIMULATOR)
unsigned int a_size = 128;
unsigned int b_size = 256;
#else
@@ -256,14 +256,22 @@ int main(int argc, char** argv) {
auto props = property_list{property::queue::enable_profiling()};
// the device selector
-#ifdef FPGA_EMULATOR
- ext::intel::fpga_emulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// create the device queue
- queue q(device_selector, props);
+ queue q(selector, props);
+
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
bool success = true;
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/README.md
index ad712e1a09..8decb023de 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/README.md
@@ -12,7 +12,7 @@ This tutorial demonstrates how to use SYCL* Universal Shared Memory (USM) to str
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -23,6 +23,29 @@ This tutorial demonstrates how to use SYCL* Universal Shared Memory (USM) to str
> **Notice**: This tutorial demonstrates an implementation of host streaming that will be supplanted by better techniques in a future release. See the [Drawbacks and Future Work](#drawbacks-and-future-work)*
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
## Purpose
The purpose of this tutorial is to show you how to take advantage of SYCL USM host allocations and zero-copy host memory to implement a streaming host-device design with low latency and high throughput. Before starting this tutorial, we recommend first reviewing the **Pipes** (pipes) and **Zero-Copy Data Transfer** (zero_copy_data_transfer) FPGA tutorials, which will teach you more about SYCL pipes and SYCL USM and zero-copy data transfers, respectively.
@@ -87,50 +110,26 @@ One method to achieve this signaling is to use the start of a kernel to signal t
We are currently working on an API and tutorial to address both of these drawbacks. This API will decrease the latency to synchronize between the host and device and therefore enable lower latency with maintained throughput. It will also dramatically improve the usability of the programming model to achieve this performance.
-### Additional Documentation
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Concepts
* Runtime kernel management.
* Host-device streaming designs.
## Building the `simple_host_streaming` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-
-
-### Running Samples in Intel® DevCloud
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations, and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- - Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- - Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- - Open a Terminal in VS Code (**Terminal>New Terminal**).
- - Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -206,39 +205,32 @@ To learn more about the extensions and how to configure the oneAPI environment,
> **Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example c:\samples\build. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
- ### Troubleshooting
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html).
-
## Examining the Reports
Locate `report.html` in the `simple_host_streaming_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
## Running the Sample
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
- ```
- ./simple_host_streaming.fpga_emu (Linux)
- simple_host_streaming.fpga_emu.exe (Windows)
- ```
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
+ ```
+ ./simple_host_streaming.fpga_emu (Linux)
+ simple_host_streaming.fpga_emu.exe (Windows)
+ ```
2. Run the sample on the FPGA simulator:
- ```
- ./simple_host_streaming.fpga_sim (Linux)
- simple_host_streaming.fpga_sim.exe (Windows)
- ```
+ * On Linux
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./simple_host_streaming.fpga_sim
+ ```
+ * On Windows
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ simple_host_streaming.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
3. Run the sample on the FPGA device:
- ```
- ./simple_host_streaming.fpga (Linux)
- simple_host_streaming.fpga.exe (Windows)
- ```
+ ```
+ ./simple_host_streaming.fpga (Linux)
+ simple_host_streaming.fpga.exe (Windows)
+ ```
### Example of Output
You should see the following output in the console:
@@ -290,6 +282,7 @@ You should see the following output in the console:
> **Note**: The experimentally measured bandwidth of the PCIe is ~11 GB/s (bi-directional, ~22 MB/s total). The FPGA device performance numbers above show that the offload, single-kernel, and multi-kernel designs are all able to saturate the PCIe bandwidth (since this design reads and writes over PCIe, a design throughput of 10.7 GB/s uses 10.7 x 2 = 21.4 GB/s of total PCIe bandwidth). However, the single-kernel and multi-kernel designs saturate the PCIe bandwidth with a latency that is ~473x lower than the offload kernel.
## License
+
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/CMakeLists.txt
index 889fe2b718..457b6b5fa5 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/CMakeLists.txt
@@ -35,11 +35,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Wall -DFPGA_EMULATOR ${DEVICE_FLAG}")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR ${DEVICE_FLAG}")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Wall -Xssimulation -DFPGA_SIMULATOR ${DEVICE_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR ${DEVICE_FLAG}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Wall -Xssimulation -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga ${DEVICE_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} ${DEVICE_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Wall -Xshardware -Xshyper-optimized-handshaking=off -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA simulator and backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/simple_host_streaming.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/simple_host_streaming.cpp
index 7cefd6ba3a..558fc69863 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/simple_host_streaming.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/simple_host_streaming/src/simple_host_streaming.cpp
@@ -140,12 +140,12 @@ int main(int argc, char* argv[]) {
try {
// device selector
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// queue properties to enable profiling
@@ -155,13 +155,17 @@ int main(int argc, char* argv[]) {
queue q(selector, fpga_tools::exception_handler, prop_list);
// make sure the device supports USM host allocations
- device d = q.get_device();
- if (!d.get_info()) {
+ auto device = q.get_device();
+ if (!device.get_info()) {
std::cerr << "ERROR: The selected device does not support USM host"
<< " allocations\n";
std::terminate();
}
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// the USM input and output data
Type *in, *out;
if ((in = malloc_host(total_count, q)) == nullptr) {
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/README.md
index 61f2c11d37..68c7652085 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/README.md
@@ -13,13 +13,36 @@ This FPGA tutorial demonstrates an advanced technique to improve the performance
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
## Purpose
This FPGA tutorial introduces an advanced optimization technique to improve the performance of nested triangular loops with loop-carried dependencies. Such structures are challenging to optimize because of the time-varying loop trip count.
@@ -162,53 +185,26 @@ Summing the number of real and dummy iterations gives the total iterations of th
***Use of ivdep***: Since the loop is restructured to ensure that a minimum of M iterations is executed, the `[[intelfpga::ivdep(M)]]` is used to hint to the compiler that at least _M_ iterations always separate any pair of dependent iterations.
-### Additional Documentation
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
-
## Key Concepts
* The triangular loop advanced optimization technique and situations in which it is applicable.
* Using `ivdep safelen` to convey the broken loop-carried dependency to the compiler.
## Building the `triangular_loop` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-
-
-### Running Samples in Intel® DevCloud
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- - Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- - Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- - Open a Terminal in VS Code (**Terminal>New Terminal**).
- - Run the sample in the VS Code terminal using the instructions below.
- - (Linux only) Debug your GPU application with GDB for Intel® oneAPI toolkits using the Generate Launch Configurations extension.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
-
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -295,43 +291,34 @@ To learn more about the extensions and how to configure the oneAPI environment,
> **Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example c:\samples\build. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
-### Troubleshooting
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-
- ### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html)
-
## Examining the Reports
Locate `report.html` in the `triangular_loop_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
Consult the "Loop Analysis" report to compare the optimized and unoptimized versions of the loop.
-
## Running the Sample
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
- ```
- ./triangular_loop.fpga_emu (Linux)
- triangular_loop.fpga_emu.exe (Windows)
- ```
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
+ ```
+ ./triangular_loop.fpga_emu (Linux)
+ triangular_loop.fpga_emu.exe (Windows)
+ ```
2. Run the sample on the FPGA simulator device:
- ```
- ./triangular_loop.fpga_sim (Linux)
- triangular_loop.fpga_sim.exe (Windows)
- ```
+ * On Linux
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./triangular_loop.fpga_sim
+ ```
+ * On Windows
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ triangular_loop.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
3. Run the sample on the FPGA device:
- ```
- ./triangular_loop.fpga (Linux)
- triangular_loop.fpga.exe (Windows)
- ```
+ ```
+ ./triangular_loop.fpga (Linux)
+ triangular_loop.fpga.exe (Windows)
+ ```
### Example of Output
@@ -369,6 +356,7 @@ Configuration | Overall Execution Time (ms) | Throughput (MB/s)
Without optimization, the compiler achieved an II of 30 on the inner-loop. With the optimization, the compiler achieves an II of 1, and the throughput increased by approximately 30x.
## License
+
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt
index b78f6e109e..47cb4fb14f 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/CMakeLists.txt
@@ -23,11 +23,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(SIMULATOR_COMPILE_FLAGS "-Wall -fsycl -fintelfpga ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp
index 3a846a5bfa..1ebf3c9486 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/triangular_loop/src/triangular_loop.cpp
@@ -126,30 +126,27 @@ int main() {
ulong t1_kernel, t2_kernel;
double time_kernel;
// Create queue, get platform and device
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
- std::cout << "\nEmulator output does not demonstrate true hardware "
- "performance. The design may need to run on actual hardware "
- "to observe the performance benefit of the optimization "
- "exemplified in this tutorial.\n\n";
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
try {
auto prop_list =
property_list{property::queue::enable_profiling()};
- sycl::queue q(device_selector, fpga_tools::exception_handler, prop_list);
+ sycl::queue q(selector, fpga_tools::exception_handler, prop_list);
platform platform = q.get_context().get_platform();
device device = q.get_device();
std::cout << "Platform name: "
<< platform.get_info().c_str() << "\n";
- std::cout << "Device name: "
- << device.get_info().c_str() << "\n\n\n";
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
// Create input and output buffers
auto input_buf = buffer(range<1>(kSize));
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/README.md
index 3b35af321d..630cdcb4e4 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/README.md
@@ -11,7 +11,7 @@ This tutorial demonstrates how to use zero-copy host memory via the SYCL Unified
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
@@ -20,6 +20,29 @@ This tutorial demonstrates how to use zero-copy host memory via the SYCL Unified
*Notice: SYCL USM host allocations (and therefore this tutorial) are only supported for the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) with USM support (i.e., intel_s10sx_pac:pac_s10_usm)*
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a design pattern.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
## Purpose
The purpose of this tutorial is to show you how to take advantage of zero-copy host memory for the FPGA to improve the performance of your design. On FPGA, SYCL* implements all host and shared allocations as *zero-copy* data in host memory. This means that the FPGA will access the data directly over PCIe, which can improve performance in cases where there is little or no temporal reuse of data in the FPGA kernel. This tutorial includes two different kernels: one using traditional SYCL buffers (`src/buffer_kernel.hpp`) and one using USM host allocations (`src/zero_copy_kernel.hpp`) that takes advantage of zero-copy host memory. Before completing this tutorial, it is suggested you review the **Explicit USM** (explicit_usm) tutorial.
@@ -36,49 +59,26 @@ However, a better approach would simply stream the data from the host memory to
This approach is not considered host streaming since the CPU and FPGA cannot (reliably) access the input/output data simultaneously. In other words, the host must wait until all the FPGA kernels have finished before accessing the output data. However, we did avoid copying the data to and from the FPGA's Device Memory and therefore, we get overall savings in total latency. This savings can be seen by running the sample on FPGA hardware or the example output later in the [Example of Output](#example-of-output) section. Another FPGA tutorial, **Simple Host Streaming** (simple_host_streaming), describes how to achieve true host streaming using USM host allocations.
-### Additional Documentation
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Concepts
* How to use USM host allocations for the FPGA.
* The performance benefits of using host allocations over traditional SYCL buffers or device allocations.
## Building the `zero_copy_data_transfer` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-
-### Running Samples in Intel® DevCloud
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
- - Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
- - Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
- - Open a Terminal in VS Code (**Terminal>New Terminal**).
- - Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -157,40 +157,32 @@ To learn more about the extensions and how to configure the oneAPI environment,
> **Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your `build` directory in a shorter path, for example `c:\samples\build`. You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
-### Troubleshooting
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-
-### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html).
-
## Examining the Reports
Locate `report.html` in the `zero_copy_data_transfer_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
## Running the Sample
- 1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
- ```
- ./zero_copy_data_transfer.fpga_emu (Linux)
- zero_copy_data_transfer.fpga_emu.exe (Windows)
- ```
- 2. Run the sample on the FPGA simulator:
- ```
- ./zero_copy_data_transfer.fpga_sim (Linux)
- zero_copy_data_transfer.fpga_sim.exe (Windows)
- ```
- 3. Run the sample on the FPGA device:
- ```
- ./zero_copy_data_transfer.fpga (Linux)
- zero_copy_data_transfer.fpga.exe (Windows)
- ```
+1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
+ ```
+ ./zero_copy_data_transfer.fpga_emu (Linux)
+ zero_copy_data_transfer.fpga_emu.exe (Windows)
+ ```
+2. Run the sample on the FPGA simulator:
+ * On Linux
+ ```
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./zero_copy_data_transfer.fpga_sim
+ ```
+ * On Windows
+ ```
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ zero_copy_data_transfer.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
+3. Run the sample on the FPGA device:
+ ```
+ ./zero_copy_data_transfer.fpga (Linux)
+ zero_copy_data_transfer.fpga.exe (Windows)
+ ```
### Example of Output
You should see the following output in the console:
@@ -212,6 +204,7 @@ You should see the following output in the console:
```
## License
+
Code samples are licensed under the MIT license. See
[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/CMakeLists.txt
index 80432ce54a..55245b4cc6 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/CMakeLists.txt
@@ -35,11 +35,11 @@ endif()
# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
# 2. The "link" stage invokes the compiler's FPGA backend before linking.
# For this reason, FPGA backend flags must be passed as link flags in CMake.
-set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Wall -DFPGA_EMULATOR ${DEVICE_FLAG}")
+set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Wall -DFPGA_EMULATOR ${DEVICE_FLAG}")
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga")
-set(SIMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Xssimulation -Wall -DFPGA_SIMULATOR ${DEVICE_FLAG}")
+set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -Wall -DFPGA_SIMULATOR ${DEVICE_FLAG}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xshyper-optimized-handshaking=off -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_SIMULATOR_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fsycl -fintelfpga -Wall ${DEVICE_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Wall ${DEVICE_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xshyper-optimized-handshaking=off -Xstarget=${FPGA_DEVICE} ${DEVICE_FLAG} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/zero_copy_data_transfer.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/zero_copy_data_transfer.cpp
index 7df84b69f1..2da35473d8 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/zero_copy_data_transfer.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/DesignPatterns/zero_copy_data_transfer/src/zero_copy_data_transfer.cpp
@@ -48,25 +48,29 @@ int main(int argc, char* argv[]) {
try {
// device selector
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector selector;
-#elif FPGA_SIMULATOR
- ext::intel::fpga_simulator_selector selector;
-#else
- ext::intel::fpga_selector selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
// create the device queue
queue q(selector, fpga_tools::exception_handler);
// make sure the device supports USM host allocations
- device d = q.get_device();
- if (!d.get_info()) {
+ auto device = q.get_device();
+ if (!device.get_info()) {
std::cerr << "ERROR: The selected device does not support USM host"
<< " allocations\n";
return 1;
}
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
+
// the golden output
std::vector out_gold(size);
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/README.md
index 4f437d9bc3..6be533e935 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/README.md
@@ -12,13 +12,36 @@ This FPGA tutorial demonstrates how to use the Algorithmic C (AC) data type `ac_
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 2 sample that demonstatres a compiler feature.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier3 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
## Purpose
This FPGA tutorial shows you how to use the `ac_fixed` type to perform fixed-point arithmetic and includes some simple examples.
@@ -111,12 +134,6 @@ When you use the `ac_fixed` library, keep the following points in mind:
Due to the differences in the internal math implementations, the results from `ac_fixed` math functions in emulation and FPGA hardware might not always be bit-accurate. This tutorial shows how to build and run the sample for emulation and FPGA hardware so you can observe the difference.
-### Additional Documentation
-
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Concepts
- Constructing an `ac_fixed` from a `float` or `double` value is much more area intensive than constructing one from another `ac_fixed`.
@@ -126,46 +143,20 @@ When you use the `ac_fixed` library, keep the following points in mind:
## Building the `ac_fixed` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
->
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
->
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-### Running Samples in Intel® DevCloud
-
-If you are running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode:
-
-- Compiles to FPGA are supported only on `fpga_compile` nodes.
-- Executing programs on FPGA hardware is supported only on `fpga_runtime` nodes of the appropriate type, such as `fpga_runtime:arria10` or `fpga_runtime:stratix10`.
-
-On the login nodes, you cannot compile or execute programs on FPGA hardware. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, increase the job timeout to 12h.
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
-
-- Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
-- Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
-- Open a Terminal in VS Code (**Terminal>New Terminal**).
-- Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -279,11 +270,6 @@ To learn more about the extensions and how to configure the oneAPI environment,
> **Note**: If you encounter any issues with long paths when compiling under Windows*, you might have to create your `build` directory in a shorter path, for example `c:\samples\build`. You can then run `cmake` from that directory, and provide `cmake` with the full path to your sample directory.
-### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse*IDE (in Linux*) and the Visual Studio*IDE (in Windows*).
-For instructions, refer to [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html).
-
## Examining the Reports
Locate the pair of `report.html` files in either:
@@ -304,10 +290,16 @@ Scroll down on the Summary page of the report and expand the section titled **Co
2. Run the sample of the FPGA simulator device
- ```bash
- ./ac_fixed.fpga_sim (Linux)
- ac_fixed.fpga_sim.exe (Windows)
- ```
+ * On Linux
+ ```bash
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./ac_fixed.fpga_sim
+ ```
+ * On Windows
+ ```bash
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ ac_fixed.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
3. Run the sample on the FPGA device
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/CMakeLists.txt
index 45c4c0209b..06249e736b 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/CMakeLists.txt
@@ -32,7 +32,7 @@ set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR
set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}")
set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_SIMULATOR -Wall ${WIN_FLAG}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/ac_fixed.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/ac_fixed.cpp
index 76b2d11992..2a94fe35b1 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/ac_fixed.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_fixed/src/ac_fixed.cpp
@@ -109,17 +109,23 @@ void TestCalculateWithACFixed(queue &q, const fixed_10_3_t &x,
}
int main() {
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
try {
// Create the SYCL device queue
- queue q(device_selector, fpga_tools::exception_handler);
+ queue q(selector, fpga_tools::exception_handler);
+
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
// I. Constructing `ac_fixed` Numbers
std::cout << "1. Testing Constructing ac_fixed from float or ac_fixed:\n";
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/README.md
index b93299129e..e2d4fff73f 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/README.md
@@ -12,13 +12,36 @@ This FPGA tutorial demonstrates how to use the Algorithmic C (AC) data type `ac_
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 2 sample that demonstatres a compiler feature.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier3 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
## Purpose
This FPGA tutorial shows how to use the `ac_int` data type with some simple examples.
@@ -92,12 +115,6 @@ Kernel `ShiftOps` contains an `ac_int` left-shifter and an `ac_int` right-shifte
Kernel `BitOps` demonstrates bit operations with bit select operator `[]` and bit slice operations `slc` and `set_slc`.
-### Additional Documentation
-
-- [Explore SYCL* Through Intel® FPGA Code Samples](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of FPGAs and SYCL.
-- [FPGA Optimization Guide for Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) helps you understand how to target FPGAs using SYCL and Intel® oneAPI Toolkits.
-- [Intel® oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) helps you understand target-independent, SYCL-compliant programming using Intel® oneAPI Toolkits.
-
## Key Concepts
- The `ac_int` data type can be used to generate hardware for only as many bits as are needed by your application. Native integer types must generate hardware for only 8, 16, 32, or 64 bits.
@@ -106,41 +123,20 @@ Kernel `BitOps` demonstrates bit operations with bit select operator `[]` and bi
## Building the `ac_int` Tutorial
-> **Note**: If you have not already done so, set up your CLI
-> environment by sourcing the `setvars` script located in
-> the root of your oneAPI installation.
+> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables.
+> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window.
+> This practice ensures that your compiler, libraries, and tools are ready for development.
>
> Linux*:
->
> - For system wide installations: `. /opt/intel/oneapi/setvars.sh`
-> - For private installations: `. ~/intel/oneapi/setvars.sh`
+> - For private installations: ` . ~/intel/oneapi/setvars.sh`
+> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'`
>
> Windows*:
->
> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat`
+> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'`
>
->For more information on environment variables, see **Use the setvars Script** for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html), or [Windows](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
-
-### Running Samples in Intel® DevCloud
-
-If running a sample in the Intel® DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
-
-When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
-
-### Using Visual Studio Code* (Optional)
-
-You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
-and browse and download samples.
-
-The basic steps to build and run a sample using VS Code include:
-
-- Download a sample using the extension **Code Sample Browser for Intel® oneAPI Toolkits**.
-- Configure the oneAPI environment with the extension **Environment Configurator for Intel® oneAPI Toolkits**.
-- Open a Terminal in VS Code (**Terminal>New Terminal**).
-- Run the sample in the VS Code terminal using the instructions below.
-
-To learn more about the extensions and how to configure the oneAPI environment, see the
-[Using Visual Studio Code with Intel® oneAPI Toolkits User Guide](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html).
### On a Linux* System
@@ -261,21 +257,6 @@ directory in a shorter path, for example c:\samples\build. You can then run
cmake from that directory, and provide cmake with the full path to your sample
directory.
-### Troubleshooting
-
-If an error occurs, you can get more details by running `make` with
-the `VERBOSE=1` argument:
-``make VERBOSE=1``
-For more comprehensive troubleshooting, use the Diagnostics Utility for
-Intel® oneAPI Toolkits, which provides system checks to find missing
-dependencies and permissions errors.
-[Learn more](https://software.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html).
-
-### In Third-Party Integrated Development Environments (IDEs)
-
-You can compile and run this tutorial in the Eclipse*IDE (in Linux*) and the Visual Studio*IDE (in Windows*).
-For instructions, refer to the following link: [FPGA Workflows on Third-Party IDEs for Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-oneapi-dpcpp-fpga-workflow-on-ide.html)
-
## Examining the Reports
Locate `report.html` in the `ac_int_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
@@ -295,10 +276,16 @@ Navigate to *System Viewer* (*Views* > *System Viewer*) and find the cluster in
2. Run the sample of the FPGA simulator device
- ```bash
- ./ac_int.fpga_sim (Linux)
- ac_int.fpga_sim.exe (Windows)
- ```
+ * On Linux
+ ```bash
+ CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./ac_int.fpga_sim
+ ```
+ * On Windows
+ ```bash
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1
+ ac_int.fpga_sim.exe
+ set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=
+ ```
3. Run the sample on the FPGA device
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/CMakeLists.txt
index 361646a93a..0d127d1e79 100755
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/CMakeLists.txt
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/CMakeLists.txt
@@ -33,7 +33,7 @@ set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}")
# simulator compilation
set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_SIMULATOR -Wall ${WIN_FLAG}")
set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${AC_TYPES_FLAG} ${USER_HARDWARE_FLAGS}")
-set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG}")
+set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_HARDWARE")
set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
# We do not need to supply the AC_TYPES_FLAG for the 'report' target's linking stage.
set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}")
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/ac_int.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/ac_int.cpp
index 13950552fd..de6e478922 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/ac_int.cpp
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/ac_int/src/ac_int.cpp
@@ -113,18 +113,24 @@ MyInt14 TestBitAccess(queue &q, const MyInt14 &a) {
}
int main() {
-#if defined(FPGA_EMULATOR)
- ext::intel::fpga_emulator_selector device_selector;
-#elif defined(FPGA_SIMULATOR)
- ext::intel::fpga_simulator_selector device_selector;
-#else
- ext::intel::fpga_selector device_selector;
+#if FPGA_SIMULATOR
+ auto selector = sycl::ext::intel::fpga_simulator_selector_v;
+#elif FPGA_HARDWARE
+ auto selector = sycl::ext::intel::fpga_selector_v;
+#else // #if FPGA_EMULATOR
+ auto selector = sycl::ext::intel::fpga_emulator_selector_v;
#endif
bool passed = true;
try {
- queue q(device_selector, fpga_tools::exception_handler);
+ queue q(selector, fpga_tools::exception_handler);
+
+ auto device = q.get_device();
+
+ std::cout << "Running on device: "
+ << device.get_info().c_str()
+ << std::endl;
constexpr int kVal1 = 1000, kVal2 = 2;
diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/dsp_control/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/dsp_control/README.md
index dee35b0ff4..fa3aa09284 100644
--- a/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/dsp_control/README.md
+++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/Features/dsp_control/README.md
@@ -12,13 +12,36 @@ This FPGA tutorial demonstrates how to set the implementation preference for cer
> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles.
>
-> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH:
+> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH:
> - Questa*-Intel® FPGA Edition
> - Questa*-Intel® FPGA Starter Edition
> - ModelSim® SE
>
> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH.
+## Prerequisites
+
+This sample is part of the FPGA code samples.
+It is categorized as a Tier 3 sample that demonstatres a compiler feature.
+
+```mermaid
+flowchart LR
+ tier1("Tier 1: Get Started")
+ tier2("Tier 2: Explore the Fundamentals")
+ tier3("Tier 3: Explore the Advanced Techniques")
+ tier4("Tier 4: Explore the Reference Designs")
+
+ tier1 --> tier2 --> tier3 --> tier4
+
+ style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+ style tier3 fill:#f96,stroke:#333,stroke-width:1px,color:#fff
+ style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff
+```
+
+Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md).
+You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc.
+
## Purpose
This tutorial shows how to apply global and local controls to set the implementation preference between DSPs and soft-logic for certain math operations. The global control is applied using a command-line flag and affects applicable math operations in all kernels. The local control is applied as a library function and affects math operations in a block scope in a single kernel. Both global and local controls only affect math operations that support DSP control (see table below).
@@ -64,12 +87,6 @@ The second template argument `Propagate::