From cad9d7fd1687f7d0249bfa8c84d008850ab57e03 Mon Sep 17 00:00:00 2001 From: kevinUTAT Date: Fri, 10 Feb 2023 09:57:09 -0800 Subject: [PATCH 1/7] restore from empty_project --- .../GettingStarted/fpga_compile/README.md | 337 ++++++++++-------- .../fpga_compile/part1-C++/CMakeLists.txt | 20 ++ .../fpga_compile/part1-C++/src/CMakeLists.txt | 25 ++ .../fpga_compile/part1-C++/src/vector_add.cpp | 46 +++ .../part2-dpcpp_functor_usm/CMakeLists.txt | 20 ++ .../src/CMakeLists.txt | 99 +++++ .../src/vector_add.cpp | 97 +++++ .../part3-dpcpp_lambda_usm/CMakeLists.txt | 20 ++ .../part3-dpcpp_lambda_usm/src/CMakeLists.txt | 99 +++++ .../part3-dpcpp_lambda_usm/src/vector_add.cpp | 96 +++++ .../part4-dpcpp_lambda_buffers/CMakeLists.txt | 20 ++ .../src/CMakeLists.txt | 99 +++++ .../src/vector_add.cpp | 105 ++++++ .../CMakeLists.txt | 20 ++ .../src/CMakeLists.txt | 99 +++++ .../src/vector_add.cpp | 116 ++++++ 16 files changed, 1170 insertions(+), 148 deletions(-) create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/vector_add.cpp create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/vector_add.cpp create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/vector_add.cpp create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt create mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md index 41af40b99d..1e3db77510 100755 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md @@ -1,30 +1,32 @@ -# Compiling SYCL* for FPGA -This FPGA tutorial introduces how to compile SYCL*-compliant code for FPGA through a simple vector addition example. If you are new to SYCL* for FPGA, start with this sample. +# `Compiling SYCL* for FPGAs` Sample +This FPGA tutorial introduces how to compile SYCL*-compliant code for FPGAs through a simple vector addition example. If you are new to SYCL* for FPGAs, start with this sample. | Optimized for | Description |:--- |:--- | OS | Linux* Ubuntu* 18.04/20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10 | Hardware | Intel® Agilex™, Arria® 10, and Stratix® 10 FPGAs | Software | Intel® oneAPI DPC++/C++ Compiler -| What you will learn | How and why compiling SYCL* code for FPGA differs from CPU or GPU
FPGA device image types and when to use them
The compile options used to target FPGA -| Time to complete | 15 minutes +| What you will learn | How and why compiling SYCL* code for FPGA differs from CPU or GPU
The compile options used to target FPGA devices +| Time to complete | 60 minutes > **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles. > -> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH: +> For the simulation flow, one of the following simulators must be installed and accessible through your PATH environment variable: > - Questa*-Intel® FPGA Edition > - Questa*-Intel® FPGA Starter Edition > - ModelSim® SE > > When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH. -> +> +> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH. +> > :warning: Make sure you add the device files associated with the FPGA that you are targeting to your Intel® Quartus® Prime installation. -## Prerequisites +> **Note**: SYCL USM allocations, used in `part2` and `part5` of this tutorial, are only supported on FPGA boards that have a USM capable BSP (e.g. the Intel® FPGA PAC D5005 with Intel Stratix® 10 SX with USM support: intel_s10sx_pac:pac_s10_usm) or when targeting an FPGA family/part number. +## Prerequisites This sample is part of the FPGA code samples. It is categorized as a Tier 1 sample that helps you getting started. - ```mermaid flowchart LR tier1("Tier 1: Get Started") @@ -39,138 +41,171 @@ flowchart LR style tier3 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff style tier4 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff ``` - Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/DPC++FPGA/README.md). You can also find more information about [troubleshooting build errors](/DirectProgramming/DPC++FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/DPC++FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/DPC++FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/DPC++FPGA/README.md#documentation), etc. + + +> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles. +> +> For using the simulator flow, one of the following simulators must be installed and accessible through your PATH: +> - Questa*-Intel® FPGA Edition +> - Questa*-Intel® FPGA Starter Edition +> - ModelSim® SE +> +> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH. + ## Purpose Field-programmable gate arrays (FPGAs) are configurable integrated circuits that can be programmed to implement arbitrary circuit topologies. Classified as *spatial* compute architectures, FPGAs differ significantly from fixed Instruction Set Architecture (ISA) devices like CPUs and GPUs. FPGAs offer a different set of optimization trade-offs from these traditional accelerator devices. While SYCL* code can be compiled for CPU, GPU, or FPGA, compiling to FPGA is somewhat different. This tutorial explains these differences and shows how to compile a "Hello World" style vector addition kernel for FPGA, following the recommended workflow. ### Why is compilation different for FPGA? -FPGAs differ from CPUs and GPUs in many interesting ways. However, in this tutorial's scope, there is only one difference that matters: compared to CPU or GPU, generating a device image for FPGA hardware is a computationally intensive and time-consuming process. It is usual for an FPGA compile to take several hours to complete. +FPGAs differ from CPUs and GPUs in many interesting ways. + +Compared to CPU or GPU, generating a device image for FPGA hardware is a computationally intensive and time-consuming process. It is usual for an FPGA compile to take several hours to complete. For this reason, only ahead-of-time (or "offline") kernel compilation mode is supported for FPGA. The long compile time for FPGA hardware makes just-in-time (or "online") compilation impractical. + +Long compile times are detrimental to developer productivity. The Intel® oneAPI DPC++/C++ Compiler provides several mechanisms that enable developers targeting FPGAs to iterate quickly on their designs. By circumventing the time-consuming process of full FPGA compilation wherever possible, developers can enjoy the fast compile times familiar to CPU and GPU developers. -For this reason, only ahead-of-time (or "offline") kernel compilation mode is supported for FPGA. The long compile time for FPGA hardware makes just-in-time (or "online") compilation impractical. +### Multiarchitecture binary vs IP component -Long compile times are detrimental to developer productivity. The Intel® oneAPI DPC++/C++ Compiler provides several mechanisms that enable developers targeting FPGA to iterate quickly on their designs. By circumventing the time-consuming process of full FPGA compilation wherever possible, SYCL for FPGA developers can enjoy the fast compile times familiar to CPU and GPU developers. +In the FPGA multiarchitecture binary generation flow, you can generate an executable host application and accelerator for a PCIe FPGA board if you have a compatible board support package (BSP). Intel provides BSPs for the Intel® PAC with Intel Arria® 10 GX FPGA, and the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX). If you have a different board, check with your vendor to see if they supply a BSP. +In the FPGA IP component generation flow, you can generate an IP component that you can import into an Intel® Quartus® Prime project. You can generate an IP by targeting your compilation to a supported Intel® FPGA device family or part number (for example, `Agilex` or `AGFA014R24B1E1V`) instead of a named board (for example, `intel_a10gx_pac:pac_a10`). -### Three types of SYCL for FPGA compilation -The three types of FPGA compilation are summarized in the table below. +The FPGA IP component generation flow does not generate any FPGA accelerated executable, only RTL (Register Transfer Level) IP component files. The host application is treated only as a 'testbench' that exercises and validates your IP component in emulation and simulation. -| Device Image Type | Time to Compile | Description ---- |--- |--- -| FPGA Emulator | seconds | The FPGA device code is compiled to the CPU.
This is used to verify the code's functional correctness. -| Optimization Report | minutes | The FPGA device code is partially compiled for hardware.
The compiler generates an optimization report that describes the structures generated on the FPGA, identifies performance bottlenecks, and estimates resource utilization. -| FPGA Hardware | hours | Runs Intel® Quartus® to get accurate resource usage and fmax estimates. If a BSP is targeted, generates the real FPGA bitstream to execute on the target FPGA platform +### Four compilation options +The four types of FPGA compilation are summarized in the table below. -The typical FPGA development workflow is to iterate in each of these stages, refining the code using the feedback provided by that stage. Intel® recommends relying on emulation and the optimization report whenever possible. + | Target | Expected Time | Output | Description + |:--- |:--- |:--- |:--- + |Emulator | Seconds | x86-64 binary | Compiles the FPGA device code to the CPU. Use the Intel® FPGA Emulation Platform for OpenCL™ software to verify your SYCL code’s functional correctness. + | Optimization Report | Minutes | RTL + FPGA reports | Compiles the FPGA device code to RTL and generates an optimization report that describes the structures generated on the FPGA, identifies performance bottlenecks, and estimates resource utilization. This report will include the interfaces defined in your selected Board Support Package. + | Simulator | Minutes | RTL + FPGA reports + x86-64 binary | Compiles the FPGA device code to RTL and generates a simulation testbench. Use the Questa*-Intel® FPGA Edition simulator to verify your design. + | FPGA Hardware | Multiple Hours | Quartus Place & Route (Multiarchitecture binary) + FPGA reports + x86-64 host binary | Compiles the FPGA device code to RTL and compiles the generated RTL using Intel® Quartus® Prime. If you specified a BSP with `FPGA_DEVICE`, this will generate an FPGA image that you can run on the corresponding accelerator board. -Compiling for FPGA emulation or generating the FPGA optimization report requires only the Intel® oneAPI DPC++/C++ Compiler (part of the Intel® oneAPI Base Toolkit). +The typical FPGA development workflow is to iterate in each of these stages, refining the code using the feedback provided by that stage. You can avoid long compile times by relying on emulation and the optimization report whenever possible. #### FPGA Emulator -The FPGA emulator is the fastest method to verify the correctness of your code. The FPGA emulator executes the SYCL* device code on the CPU. The emulator is similar to the SYCL* host device, but unlike the host device, the FPGA emulator device supports FPGA extensions such as FPGA pipes and `fpga_reg`. +The FPGA emulator is the fastest method to verify the correctness of your code. The FPGA emulator executes the SYCL* device code on the CPU. The emulator is similar to the SYCL* host device, but unlike the host device, the FPGA emulator device supports FPGA extensions such as FPGA pipes and `fpga_reg` (although some of these features, such as `fpga_reg` may not affect how your design runs on the emulator). + +There are two important caveats to remember when using the FPGA emulator. +* **Performance is not representative.** _Never_ draw inferences about FPGA performance from the FPGA emulator. The FPGA emulator's timing behavior is uncorrelated to that of the physical FPGA hardware. For example, an optimization that yields a 100x performance improvement on the FPGA may show no impact on the emulator performance. It may show an unrelated increase or even a decrease. +* **Undefined behavior may differ.** If your code produces different results when compiled for the FPGA emulator versus FPGA hardware, your code may exercises undefined behavior. By definition, undefined behavior is not specified by the language specification and may manifest differently on different targets. + +#### Optimization Report (Early Image) + +For this compilation type, your SYCL device code is optimized and converted into an FPGA design specified in Verilog RTL (a low-level, native entry language for FPGAs). This intermediate compilation result is also called the *FPGA early device image*, which is **not** executable. + +The optimization report contains significant information about how the compiler has transformed your device code into an FPGA design. The report includes visualizations of structures generated on the FPGA, performance and expected performance bottleneck information, and estimated resource utilization. Optimization reports are generated for the "optimization report", "simulator" and "hardware" compilation types. + +The [FPGA Optimization Guide for Intel® oneAPI Toolkits Developer Guide][1] contains a chapter about how to analyze the reports generated after the FPGA early image and FPGA image. #### FPGA Simulator +The FPGA simulator allows you to simulate the exact behavior of the synthesized kernel. Like emulation, you can run simulation on a system that does not have a target FPGA board installed. The simulator models a kernel much more accurately than the emulator, but it is much slower than the emulator. -The FPGA simulator is the fastest method to verify the correctness of the gerenated RTL. The FPGA simulator executes the SYCL* device code in an RTL simulator (e.g. Questa*). The host code still runs on the CPU as it would when targetting an FPGA. When using this flow, the generated exectuable will launch the simulator and inject the obtained results in the host execution. +The Intel oneAPI DPC++/C++ Compiler links your design C++ testbench with an RTL-compiled version of your component that runs in an RTL simulator. You do not need to invoke any RTL simulator manually, but you can add the `-Xsghdl` flag to save the simulation waveform for later viewing. -There are two important caveats to remember when using the FPGA emulator and the FPGA simulator. -* **Performance is not representative.** _Never_ draw inferences about FPGA performance from the FPGA emulator. The FPGA emulator's timing behavior is uncorrelated to that of the physical FPGA hardware. For example, an optimization that yields a 100x performance improvement on the FPGA may show no impact on the emulator performance. It may show an unrelated increase or even a decrease. -* **Undefined behavior may differ.** If your code produces different results when compiled for the FPGA emulator versus FPGA hardware, your code most likely exercises undefined behavior. By definition, undefined behavior is not specified by the language specification and may manifest differently on different targets. +> **Note**: Running the simulation executable can take a long time if your device code is complex or if your test inputs are large. To save simulation time, use the smallest possible input. -#### Optimization Report -A full FPGA compilation occurs in two stages: -1. **FPGA early image:** The SYCL device code is optimized and converted into an FPGA design specified in Verilog RTL (a low-level, native entry language for FPGAs). This intermediate compilation result is the FPGA early device image, which is *not* executable. This FPGA early image compilation process takes minutes. -2. **FPGA hardware image:** The Verilog RTL specifying the design's circuit topology is mapped onto the FPGA's sea of primitive hardware resources by the Intel® Quartus® Prime software. Intel® Quartus® Prime is included in the Intel® FPGA Add-On, which is required for this compilation stage. The result is an FPGA hardware binary (also referred to as a bitstream). This compilation process takes hours. +#### FPGA Hardware (Hardware Image) -Optimization reports are generated after both stages. The optimization report generated after the FPGA early device image, sometimes called the "static report," contains significant information about how the compiler has transformed your device code into an FPGA design. The report includes visualizations of structures generated on the FPGA, performance and expected performance bottleneck information, and estimated resource utilization. +The generated Verilog RTL is mapped onto the FPGA hardware resources by the Intel® Quartus® Prime software. The estimated performance and resource utilization is therefore much more accurate than the estimates obtained in the optimization report compilation type. -The [FPGA Optimization Guide for Intel® oneAPI Toolkits Developer Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide/top/analyze-your-design.html) contains a chapter on how to analyze the reports generated after the FPGA early image and FPGA image. +If you compile a multiarchitecture binary, the resulting binary will include an FPGA hardware image (also referred to as a bitstream) that is executable on an FPGA accelerator card with a supported BSP. The compiler will interface your design with the BSP, and your host application will seamlessly make the system calls to launch kernels on the FPGA. -#### FPGA Hardware -This is a full compile through to the FPGA hardware image. -You can target an FPGA family/part number to get accurate resource usage and fmax estimates. -You can also target a device with a BSP (e.g. for the Intel® PAC with Intel Arria® 10 GX FPGA: intel_a10gx_pac:pac_a10) to get an executable that can be directly executed. +If you compile an IP component, the compilation result is **not** executable. IP components are compiled in isolation and not interfaced with other components on the FPGA. The purpose of this compilation flow is to get accurate resource utilization and performance data for IP components. + +This compilation process takes hours, although it may be faster if you generate a re-usable IP component. ### Device Selectors The following code snippet demonstrates how you can specify the target device in your source code. The selector is used to specify the target device at runtime. +It is recommended to use a preprocessor macro to choose between the emulator and FPGA selectors. This makes it easy to switch between targets using only command-line options. Since the FPGA only supports ahead-of-time compilation, dynamic selectors (such as the `default_selector`) are less useful than explicit selectors when targeting FPGA. + ```c++ // FPGA device selectors are defined in this utility header #include int main() { - // Select either: - // - the FPGA emulator device (CPU emulation of the FPGA) - // - the FPGA simulator - // - the FPGA device (a real FPGA) +// choose a selector based on compiler flags. #if FPGA_SIMULATOR - auto selector = sycl::ext::intel::fpga_simulator_selector_v; + auto selector = sycl::ext::intel::fpga_simulator_selector_v; #elif FPGA_HARDWARE - auto selector = sycl::ext::intel::fpga_selector_v; + auto selector = sycl::ext::intel::fpga_selector_v; #else // #if FPGA_EMULATOR - auto selector = sycl::ext::intel::fpga_emulator_selector_v; + auto selector = sycl::ext::intel::fpga_emulator_selector_v; #endif - - queue q(selector); - ... + sycl::queue q(selector); + ... } ``` -Notice that the FPGA emulator, FPGA simulator and the FPGA are different target devices. It is recommended to use a preprocessor define to choose between the different selectors. This makes it easy to switch between targets using only command-line options. Since the FPGA only supports ahead-of-time compilation, dynamic selectors (such as the default_selector) are less useful than explicit selectors when targeting FPGA. ### Compiler Options -This section includes a helpful list of commands and options to compile this design for the FPGA emulator, generate the FPGA early image optimization reports, and compile for FPGA hardware. +This section includes a helpful list of commands and options to compile this design for the FPGA emulator, generate the FPGA early image optimization reports, and compile for FPGA hardware.\ +>**Note**: In this sample, the compiler is refered to as `icpx`. On Windows, you should use `icx-cl`. -**NOTE:** In this sample, the compiler is refered to as `icpx`. On Windows, you should use `icx-cl`. +FPGA Emulator -**FPGA emulator** +```bash +# FPGA emulator image +icpx -fsycl -fintelfpga -DFPGA_EMULATOR fpga_design.cpp -o fpga_design.fpga_emu +``` -`icpx -fsycl -fintelfpga -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu` +Optimization Report -**FPGA simulator** +```bash +# FPGA early image (with optimization report): +icpx -fsycl -fintelfpga -DFPGA_HARDWARE fpga_design.cpp -Xshardware -fsycl-link=early -Xstarget=Agilex -o fpga_design_report.a +``` +Use the`-Xstarget` flag to target a supported board, a device family, or a specific FPGA part number. -`icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR fpga_compile.cpp -o fpga_compile.fpga_sim` +Simulator -**Optimization report (default FPGA device)** +```bash +# FPGA simulator image: +icpx -fsycl -fintelfpga -DFPGA_SIMULATOR fpga_design.cpp -Xssimulation -Xstarget=Agilex -Xsghdl -o fpga_design_sim.a +``` +Through `-Xstarget`, you can target an explicit board, a device family or a FPGA part number. -`icpx -fsycl -fintelfpga -DFPGA_HARDWARE -Xshardware -fsycl-link=early fpga_compile.cpp -o fpga_compile_report.a` +Hardware -**Optimization report (explicit FPGA device)** +```bash +# FPGA hardware image: +icpx -fsycl -fintelfpga -DFPGA_HARDWARE fpga_design.cpp -Xshardware -Xstarget=Agilex -o fpga_design.fpga +``` +Through `-Xstarget`, you can target an explicit board, a device family or a FPGA part number. -`icpx -fsycl -fintelfpga -DFPGA_HARDWARE -Xshardware -fsycl-link=early -Xstarget=intel_s10sx_pac:pac_s10 fpga_compile.cpp -o fpga_compile_report.a` +`-DFPGA_EMULATOR`, `-DFPGA_SIMULATOR`, `-DFPGA_HARDWARE` are options that adds a preprocessor define that invokes the emulator/simulator/FPGA device selector in this sample (see code snippet above). -**FPGA hardware (default FPGA device)** +The [Intel® oneAPI Programming Guide][2] contains a chapter explains the compiler options used here. -`icpx -fsycl -fintelfpga -DFPGA_HARDWARE -Xshardware fpga_compile.cpp -o fpga_compile.fpga` +### Source Code -**FPGA hardware (explicit FPGA device)** +There are 5 parts to this tutorial located in the 3 sub-folders. Together, they demonstrate how you can migrate an algorithm from vanilla C++ code to SYCL for FPGA. Note that you may also choose to use a functor with buffers, or a function with USM. -`icpx -fsycl -fintelfpga -DFPGA_HARDWARE -Xshardware -Xstarget=intel_s10sx_pac:pac_s10 fpga_compile.cpp -o fpga_compile.fpga` +#### Part 1 C++ +Part 1 demonstrates a vector addition program in vanilla C++. Observe how the `VectorAdd` function is separated from the `main()` function, and the `vec_a`, `vec_b`, and `vec_c` vectors are allocated onto the heap. -The compiler options used are explained in the table. -| Flag | Explanation -|:--- |:--- -| `-fsycl` | Instructs the compiler that the code is written in the SYCL language -| `-fintelfpga` | Perform ahead-of-time compilation for FPGA. -| `-DFPGA_EMULATOR` | Adds a preprocessor define that invokes the emulator device selector in this sample (see code snippet above). -| `-DFPGA_SIMULATOR` | Adds a preprocessor define that invokes the simulator device selector in this sample (see code snippet above). -| `-DFPGA_HARDWARE` | Adds a preprocessor define that invokes the FPGA hardware device selector in this sample (see code snippet above). -| `-Xshardware` | `-Xs` is used to pass arguments to the FPGA backend.
Since the emulator is the default FPGA target, you must pass `Xshardware` to instruct the compiler to target FPGA hardware. -| `-Xstarget` | Optional argument to specify the FPGA target.
If omitted, a default FPGA board is chosen. -| `-fsycl-link=early`| Instructs the compiler to stop after creating the FPGA early image (and associated optimization report). +#### Part 2 SYCL* (functor and USM) +Part 2 shows the same vector addition from part 1, but in SYCL* C++ with a 'functor' coding style using a unified shared memory (USM) interface. Compare with the source code in part 1 to see SYCL*-specific code changes. Observe how the `VectorAdd` functor is called using `q.single_task<...>(VectorAdd{...});`. This tells the DPC++ compiler to convert `VectorAdd` into RTL. Also observe how `vec_a`, `vec_b`, and `vec_c` are allocated into a shared memory space using `malloc_shared`; this tells the DPC++ compiler that `vec_a`, `vec_b`, and `vec_c` should be visible both to your kernel and your host code (or if you are creating an IP component, the testbench code). -Notice that whether you target the FPGA emulator, FPGA simulator or FPGA hardware must be specified twice: through compiler options for the ahead-of-time compilation and through the runtime device selector. +#### Part 3 SYCL* (lambda function and USM) +Part 3 demonstrates vector addition in SYCL* C++ with a 'function' coding style using unified shared memory (USM). This code style will be familiar to users who are already experienced with SYCL*. Observe how the `VectorAdd` function is called using a lambda expression: +```c++ +h.single_task<...>([=]() { + VectorAdd(...); +}); +``` +#### Part 4 SYCL* (lambda function and buffer) +Part 4 shows the vector addition in SYCL* C++ with a 'function' coding style and buffer & accessor interface. This code style will be familiar to users who are already experienced with SYCL*. Observe how `vec_a`, `vec_b`, and `vec_c` are copied into buffers before the `VectorAdd` function is called. -## Key Concepts -* How and why compiling SYCL*-compliant code to FPGA differs from CPU or GPU -* FPGA device image types and when to use them -* The compile options used to target FPGA +#### Part 5 SYCL* (functor and buffer) +Part 5 demonstrates the vector addition in SYCL* C++ with a 'functor' coding style using buffer & accessor interface. Observe how template parameters are used in the functor to pass in the accessors. ## Building the `fpga_compile` Tutorial - > **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. > Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window. > This practice ensures that your compiler, libraries, and tools are ready for development. @@ -184,11 +219,23 @@ Notice that whether you target the FPGA emulator, FPGA simulator or FPGA hardwar > - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat` > - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` > -> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html). +> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*][3] or [Use the setvars Script with Windows*][4]. -### On a Linux* System +### Run CMake to generate the Makefiles -1. Generate the `Makefile` by running `cmake`. +#### On a Linux* System +For different parts of this tutorial, navigate to the appropriate sub-folder. +```bash +cd +``` +`` can be: +- `part1-C++` +- `part2-dpcpp_functor_usm` +- `part3-dpcpp_lambda_usm` +- `part4-dpcpp_lambda_buffers` +- `part5-dpcpp_functor_buffers` + +Generate the `Makefile` by running `cmake`. ``` mkdir build cd build @@ -197,7 +244,6 @@ Notice that whether you target the FPGA emulator, FPGA simulator or FPGA hardwar ``` cmake .. ``` - > **Note**: You can change the default target by using the command: > ``` > cmake .. -DFPGA_DEVICE= @@ -210,28 +256,19 @@ Notice that whether you target the FPGA emulator, FPGA simulator or FPGA hardwar > > You will only be able to run an executable on the FPGA if you specified a BSP. -2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: - - * Compile for [emulation](#fpga-emulator) (compiles quickly, targets emulated FPGA device): - ``` - make fpga_emu - ``` - * Compile for [simulation](#fpga-simulator) (fast compile time, targets simulator FPGA device): - ``` - make fpga_sim - ``` - * Generate the [optimization report](#optimization-report): - ``` - make report - ``` - * Compile for [FPGA hardware](#fpga-hardware) (takes longer to compile, targets FPGA device): - ``` - make fpga - ``` - -### On a Windows* System - -1. Generate the `Makefile` by running `cmake`. +#### On a Windows* System +For different parts of this tutorial, navigate to the appropriate sub-folder. +```cmd +cd +``` +`` can be: +- `part1-C++` +- `part2-dpcpp_functor_usm` +- `part3-dpcpp_lambda_usm` +- `part4-dpcpp_lambda_buffers` +- `part5-dpcpp_functor_buffers` + +Generate the `Makefile` by running `cmake`. ``` mkdir build cd build @@ -252,61 +289,65 @@ Notice that whether you target the FPGA emulator, FPGA simulator or FPGA hardwar > > You will only be able to run an executable on the FPGA if you specified a BSP. -2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: - - * Compile for [emulation](#fpga-emulator) (compiles quickly, targets emulated FPGA device): - ``` - nmake fpga_emu - ``` - * Compile for [simulation](#fpga-simulator) (fast compile time, targets simulator FPGA device): - ``` - nmake fpga_sim - ``` - * Generate the [optimization report](#optimization-report): - ``` - nmake report - ``` - * Compile for [FPGA hardware](#fpga-hardware) (takes longer to compile, targets FPGA device): - ``` - nmake fpga - ``` +### Build using `make`/`nmake` +After using CMake to generate build artifacts, you can then build with specific targets. This project can build 4 targets. -## Examining the Reports -Locate `report.html` in the `fpga_compile_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*. +| Compilation Type | Command (Linux) | Command (Windows) +|:--- |:--- |:--- +| FPGA Emulator | `make fpga_emu` | `nmake fpga_emu` +| Optimization Report | `make report` | `nmake report` +| FPGA Simulator | `make fpga_sim` | `nmake fpga_sim` +| FPGA Hardware | `make fpga` | `nmake fpga` + +The `fpga_emu`, `fpga_sim` and `fpga` targets produce binaries that you can run. The executables will be called `vector_add.fpga_emu`, `vector_add.fpga_sim`, and `vector_add.fpga`. The `fpga` target will produce an executable binary if you create a multiarchitecture binary kernel. -Browse the reports that were generated for the `VectorAdd` kernel's FPGA early image. You may also wish to examine the reports generated by the full FPGA hardware compile and compare their contents. +For part 1 of this tutorial, only the `fpga_emu` target is available as this regular C++ code only target a CPU. + +## Examining the Reports +In *part2*, *part3*, *part4* and *part5*, after running the `report` target, the optimization report can be viewed using the `fpga_report` application: +``` +fpga_report vector_add.report.prj/reports/vector_add_report.zip +``` +Browse the reports that were generated for the `VectorAdd` kernel's FPGA early image. You may also wish to examine the reports generated by the simulation compile and full FPGA hardware compile and compare their contents. ## Running the Sample -1. Run the sample on the FPGA emulator (the kernel executes on the CPU): - ``` - ./fpga_compile.fpga_emu (Linux) - fpga_compile.fpga_emu.exe (Windows) - ``` -2. Run the sample on the FPGA simulator device (the kernel executes in the simulator): - * On Linux - ```bash - CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./fpga_compile.fpga_sim - ``` - * On Windows - ```bash - set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 - fpga_compile.fpga_sim.exe - set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA= - ``` + 1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + ``` + ./vector_add.fpga_emu (Linux) + vector_add.fpga_emu.exe (Windows) + ``` +2. Run the sample on the FPGA simulator device (the kernel executes in a simulator): + * On Linux + ``` + CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./vector_add.fpga_sim + ``` + * On Windows + ``` + set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 + vector_add.fpga_sim.exe + set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA= + ``` 3. Run the sample on the FPGA device (only if you ran `cmake` with `-DFPGA_DEVICE=:`): - ``` - ./fpga_compile.fpga (Linux) - fpga_compile.fpga.exe (Windows) - ``` + ``` + ./vector_add.fpga (Linux) + vector_add.fpga.exe (Windows) + ``` ### Example of Output ``` -PASSED: results are correct +using FPGA Simulator. +add two vectors of size 256 +PASSED ``` ## License +Code samples are licensed under the MIT license. See +[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. -Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. +Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt). -Third-party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt). +[1]: +[2]: +[3]: +[4]: diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/CMakeLists.txt new file mode 100644 index 0000000000..61f35423a8 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/CMakeLists.txt @@ -0,0 +1,20 @@ +if(UNIX) + # Direct CMake to use icpx rather than the default C++ compiler/linker + set(CMAKE_CXX_COMPILER icpx) +else() # Windows + # Force CMake to use icx-cl rather than the default C++ compiler/linker + # (needed on Windows only) + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.7.2) + +project(FPGACompile CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/CMakeLists.txt new file mode 100644 index 0000000000..6d69ac3db4 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/CMakeLists.txt @@ -0,0 +1,25 @@ + +set(SOURCE_FILE vector_add.cpp) +set(TARGET_NAME vector_add) + +# FPGA device selection +if(DEFINED FPGA_DEVICE) + message(STATUS "Ignoring FPGA_DEVICE: ${FPGA_DEVICE}, not applicable") +endif() + +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) + +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +# This code sample do not support simulator and fpga. +# following targets are added to be compatiable with reg-tests +set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim) + +add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE}) +add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET}) + +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +add_executable(${FPGA_TARGET} ${SOURCE_FILE}) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/vector_add.cpp new file mode 100644 index 0000000000..616af1ef86 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part1-C++/src/vector_add.cpp @@ -0,0 +1,46 @@ +#include + +void VectorAdd(const int *a_in, const int *b_in, int *c_out, int len) { + for (int idx = 0; idx < len; idx++) { + int a_val = a_in[idx]; + int b_val = b_in[idx]; + int sum = a_val + b_val; + c_out[idx] = sum; + } +} + +constexpr int kVectSize = 256; + +int main() { + + // declare arrays and fill them + int *vec_a = new int[kVectSize]; + int *vec_b = new int[kVectSize]; + int *vec_c = new int[kVectSize]; + for (int i = 0; i < kVectSize; i++) { + vec_a[i] = i; + vec_b[i] = (kVectSize - i); + } + + std::cout << "add two vectors of size " << kVectSize << std::endl; + + VectorAdd(vec_a, vec_b, vec_c, kVectSize); + + // verify that vector C is correct + bool passed = true; + for (int i = 0; i < kVectSize; i++) { + int expected = vec_a[i] + vec_b[i]; + if (vec_c[i] != expected) { + std::cout << "idx=" << i << ": result " << vec_c[i] << ", expected (" << expected << ") A=" << vec_a[i] << " + B=" << vec_b[i] << std::endl; + passed = false; + } + } + + std::cout << (passed ? "PASSED" : "FAILED") << std::endl; + + delete[] vec_a; + delete[] vec_b; + delete[] vec_c; + + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/CMakeLists.txt new file mode 100644 index 0000000000..d865aee631 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/CMakeLists.txt @@ -0,0 +1,20 @@ +if(UNIX) + # Direct CMake to use dpcpp rather than the default C++ compiler/linker + set(CMAKE_CXX_COMPILER icpx) +else() # Windows + # Force CMake to use dpcpp rather than the default C++ compiler/linker + # (needed on Windows only) + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.7.2) + +project(FPGACompile CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/CMakeLists.txt new file mode 100644 index 0000000000..e940daa564 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/CMakeLists.txt @@ -0,0 +1,99 @@ +set(SOURCE_FILE vector_add.cpp) +set(TARGET_NAME vector_add) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_DEVICE) + set(FPGA_DEVICE "Agilex") + message(STATUS "FPGA_DEVICE was not specified.\ + \nConfiguring the design to the default FPGA family: ${FPGA_DEVICE}\ + \nPlease refer to the README for information on target selection.") +else() + message(STATUS "Configuring the design with the following target: ${FPGA_DEVICE}") +endif() + +# These are Windows-specific flags: +# 1. /EHsc This is a Windows-specific flag that enables exception handling in host code +# 2. /Qactypes Include ac_types headers and link against ac_types emulation libraries +if(WIN32) + set(WIN_FLAG "/EHsc") + set(AC_TYPES_FLAG "/Qactypes") +else() + set(AC_TYPES_FLAG "-qactypes") +endif() + +# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -Wall ${WIN_FLAG}") +set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}") +set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_SIMULATOR -Wall ${WIN_FLAG}") +set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(REPORT_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_REPORT") +set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_HARDWARE") +set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -o fpga_compile.cpp.o -c fpga_compile.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} fpga_compile.cpp.o -o fpga_compile.fpga_emu +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= -fsycl-link=early ac_fixed.cpp -o ac_fixed_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to SYCL. +add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) +target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${REPORT_COMPILE_FLAGS}") +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${REPORT_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + +############################################################################### +### FPGA Simulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${SIMULATOR_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET}) +set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}") +set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${SIMULATOR_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp new file mode 100644 index 0000000000..e79e112528 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp @@ -0,0 +1,97 @@ +#include + +// oneAPI headers +#include +#include + +// Forward declare the kernel name in the global scope. This is an FPGA best +// practice that reduces name mangling in the optimization reports. +class VectorAddID; + +struct VectorAdd { + public: + int *const vec_a_in; + int *const vec_b_in; + int *const vec_c_out; + int len; + + void operator()() const { + for (int idx = 0; idx < len; idx++) { + int a_val = vec_a_in[idx]; + int b_val = vec_b_in[idx]; + int sum = a_val + b_val; + vec_c_out[idx] = sum; + } + } +}; + +constexpr int kVectSize = 256; + +int main() { + bool passed = true; + try { + // Use compile-time macros to select either: + // - the FPGA emulator device (CPU emulation of the FPGA) + // - the FPGA device (a real FPGA) + // - the simulator device +#if FPGA_SIMULATOR + auto selector = sycl::ext::intel::fpga_simulator_selector_v; +#elif FPGA_HARDWARE + auto selector = sycl::ext::intel::fpga_selector_v; +#else // #if FPGA_EMULATOR + auto selector = sycl::ext::intel::fpga_emulator_selector_v; +#endif + + // create the device queue + sycl::queue q(selector); + + auto device = q.get_device(); + + std::cout << "Running on device: " + << device.get_info().c_str() + << std::endl; + + // declare arrays and fill them + // allocate in shared memory so the kernel can see them + int *vec_a = sycl::malloc_shared(kVectSize, q); + int *vec_b = sycl::malloc_shared(kVectSize, q); + int *vec_c = sycl::malloc_shared(kVectSize, q); + for (int i = 0; i < kVectSize; i++) { + vec_a[i] = i; + vec_b[i] = (kVectSize - i); + } + + std::cout << "add two vectors of size " << kVectSize << std::endl; + + q.single_task(VectorAdd{vec_a, vec_b, vec_c, kVectSize}).wait(); + + // verify that vec_c is correct + for (int i = 0; i < kVectSize; i++) { + int expected = vec_a[i] + vec_b[i]; + if (vec_c[i] != expected) { + std::cout << "idx=" << i << ": result " << vec_c[i] << ", expected (" << expected << ") A=" << vec_a[i] << " + B=" << vec_b[i] << std::endl; + passed = false; + } + } + + std::cout << (passed ? "PASSED" : "FAILED") << std::endl; + + sycl::free(vec_a, q); + sycl::free(vec_b, q); + sycl::free(vec_c, q); + } catch (sycl::exception const &e) { + // Catches exceptions in the host code. + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.code().value() == CL_DEVICE_NOT_FOUND) { + std::cerr << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; + std::cerr << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/CMakeLists.txt new file mode 100644 index 0000000000..d865aee631 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/CMakeLists.txt @@ -0,0 +1,20 @@ +if(UNIX) + # Direct CMake to use dpcpp rather than the default C++ compiler/linker + set(CMAKE_CXX_COMPILER icpx) +else() # Windows + # Force CMake to use dpcpp rather than the default C++ compiler/linker + # (needed on Windows only) + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.7.2) + +project(FPGACompile CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/CMakeLists.txt new file mode 100644 index 0000000000..e940daa564 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/CMakeLists.txt @@ -0,0 +1,99 @@ +set(SOURCE_FILE vector_add.cpp) +set(TARGET_NAME vector_add) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_DEVICE) + set(FPGA_DEVICE "Agilex") + message(STATUS "FPGA_DEVICE was not specified.\ + \nConfiguring the design to the default FPGA family: ${FPGA_DEVICE}\ + \nPlease refer to the README for information on target selection.") +else() + message(STATUS "Configuring the design with the following target: ${FPGA_DEVICE}") +endif() + +# These are Windows-specific flags: +# 1. /EHsc This is a Windows-specific flag that enables exception handling in host code +# 2. /Qactypes Include ac_types headers and link against ac_types emulation libraries +if(WIN32) + set(WIN_FLAG "/EHsc") + set(AC_TYPES_FLAG "/Qactypes") +else() + set(AC_TYPES_FLAG "-qactypes") +endif() + +# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -Wall ${WIN_FLAG}") +set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}") +set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_SIMULATOR -Wall ${WIN_FLAG}") +set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(REPORT_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_REPORT") +set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_HARDWARE") +set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -o fpga_compile.cpp.o -c fpga_compile.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} fpga_compile.cpp.o -o fpga_compile.fpga_emu +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= -fsycl-link=early ac_fixed.cpp -o ac_fixed_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to SYCL. +add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) +target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${REPORT_COMPILE_FLAGS}") +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${REPORT_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + +############################################################################### +### FPGA Simulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${SIMULATOR_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET}) +set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}") +set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${SIMULATOR_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/vector_add.cpp new file mode 100644 index 0000000000..1ddcabcec1 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part3-dpcpp_lambda_usm/src/vector_add.cpp @@ -0,0 +1,96 @@ +#include +#include + +// oneAPI headers +#include +#include + +using namespace sycl; + +// Forward declare the kernel name in the global scope. This is an FPGA best +// practice that reduces name mangling in the optimization reports. +class VectorAddID; + +void VectorAdd(const int *vec_a_in, const int *vec_b_in, int *vec_c_out, int len) { + for (int idx = 0; idx < len; idx++) { + int a_val = vec_a_in[idx]; + int b_val = vec_b_in[idx]; + int sum = a_val + b_val; + vec_c_out[idx] = sum; + } +} + +constexpr int kVectSize = 256; + +int main() { + bool passed = true; + try { + // Use compile-time macros to select either: + // - the FPGA emulator device (CPU emulation of the FPGA) + // - the FPGA device (a real FPGA) + // - the simulator device +#if FPGA_SIMULATOR + auto selector = sycl::ext::intel::fpga_simulator_selector_v; +#elif FPGA_HARDWARE + auto selector = sycl::ext::intel::fpga_selector_v; +#else // #if FPGA_EMULATOR + auto selector = sycl::ext::intel::fpga_emulator_selector_v; +#endif + + // create the device queue + sycl::queue q(selector); + + // make sure the device supports USM host allocations + auto device = q.get_device(); + + std::cout << "Running on device: " + << device.get_info().c_str() + << std::endl; + + // declare arrays and fill them + // allocate in shared memory so the kernel can see them + int *vec_a = malloc_shared(kVectSize, q); + int *vec_b = malloc_shared(kVectSize, q); + int *vec_c = malloc_shared(kVectSize, q); + for (int i = 0; i < kVectSize; i++) { + vec_a[i] = i; + vec_b[i] = (kVectSize - i); + } + + std::cout << "add two vectors of size " << kVectSize << std::endl; + + q.single_task([=]() { + VectorAdd(vec_a, vec_b, vec_c, kVectSize); + }) + .wait(); + + // verify that vec_c is correct + for (int i = 0; i < kVectSize; i++) { + int expected = vec_a[i] + vec_b[i]; + if (vec_c[i] != expected) { + std::cout << "idx=" << i << ": result " << vec_c[i] << ", expected (" << expected << ") A=" << vec_a[i] << " + B=" << vec_b[i] << std::endl; + passed = false; + } + } + + std::cout << (passed ? "PASSED" : "FAILED") << std::endl; + + free(vec_a, q); + free(vec_b, q); + free(vec_c, q); + } catch (sycl::exception const &e) { + // Catches exceptions in the host code. + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.code().value() == CL_DEVICE_NOT_FOUND) { + std::cerr << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; + std::cerr << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/CMakeLists.txt new file mode 100644 index 0000000000..d865aee631 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/CMakeLists.txt @@ -0,0 +1,20 @@ +if(UNIX) + # Direct CMake to use dpcpp rather than the default C++ compiler/linker + set(CMAKE_CXX_COMPILER icpx) +else() # Windows + # Force CMake to use dpcpp rather than the default C++ compiler/linker + # (needed on Windows only) + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.7.2) + +project(FPGACompile CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/CMakeLists.txt new file mode 100644 index 0000000000..e940daa564 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/CMakeLists.txt @@ -0,0 +1,99 @@ +set(SOURCE_FILE vector_add.cpp) +set(TARGET_NAME vector_add) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_DEVICE) + set(FPGA_DEVICE "Agilex") + message(STATUS "FPGA_DEVICE was not specified.\ + \nConfiguring the design to the default FPGA family: ${FPGA_DEVICE}\ + \nPlease refer to the README for information on target selection.") +else() + message(STATUS "Configuring the design with the following target: ${FPGA_DEVICE}") +endif() + +# These are Windows-specific flags: +# 1. /EHsc This is a Windows-specific flag that enables exception handling in host code +# 2. /Qactypes Include ac_types headers and link against ac_types emulation libraries +if(WIN32) + set(WIN_FLAG "/EHsc") + set(AC_TYPES_FLAG "/Qactypes") +else() + set(AC_TYPES_FLAG "-qactypes") +endif() + +# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -Wall ${WIN_FLAG}") +set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}") +set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_SIMULATOR -Wall ${WIN_FLAG}") +set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(REPORT_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_REPORT") +set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_HARDWARE") +set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -o fpga_compile.cpp.o -c fpga_compile.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} fpga_compile.cpp.o -o fpga_compile.fpga_emu +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= -fsycl-link=early ac_fixed.cpp -o ac_fixed_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to SYCL. +add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) +target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${REPORT_COMPILE_FLAGS}") +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${REPORT_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + +############################################################################### +### FPGA Simulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${SIMULATOR_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET}) +set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}") +set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${SIMULATOR_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/vector_add.cpp new file mode 100644 index 0000000000..1e70e8a46d --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part4-dpcpp_lambda_buffers/src/vector_add.cpp @@ -0,0 +1,105 @@ +#include + +// oneAPI headers +#include +#include + +// Forward declare the kernel name in the global scope. This is an FPGA best +// practice that reduces name mangling in the optimization reports. +class VectorAddID; + +void VectorAdd(const int *vec_a_in, const int *vec_b_in, int *vec_c_out, int len) { + for (int idx = 0; idx < len; idx++) { + int a_val = vec_a_in[idx]; + int b_val = vec_b_in[idx]; + int sum = a_val + b_val; + vec_c_out[idx] = sum; + } +} + +constexpr int kVectSize = 256; + +int main() { + bool passed = true; + try { + // Use compile-time macros to select either: + // - the FPGA emulator device (CPU emulation of the FPGA) + // - the FPGA device (a real FPGA) + // - the simulator device +#if FPGA_SIMULATOR + auto selector = sycl::ext::intel::fpga_simulator_selector_v; +#elif FPGA_HARDWARE + auto selector = sycl::ext::intel::fpga_selector_v; +#else // #if FPGA_EMULATOR + auto selector = sycl::ext::intel::fpga_emulator_selector_v; +#endif + + // create the device queue + sycl::queue q(selector); + + // make sure the device supports USM host allocations + auto device = q.get_device(); + + std::cout << "Running on device: " + << device.get_info().c_str() + << std::endl; + + // declare arrays and fill them + int *vec_a = new int[kVectSize]; + int *vec_b = new int[kVectSize]; + int *vec_c = new int[kVectSize]; + for (int i = 0; i < kVectSize; i++) { + vec_a[i] = i; + vec_b[i] = (kVectSize - i); + } + + std::cout << "add two vectors of size " << kVectSize << std::endl; + { + // copy the input arrays to buffers to share with kernel + sycl::buffer buffer_a{vec_a, sycl::range(kVectSize)}; + sycl::buffer buffer_b{vec_b, sycl::range(kVectSize)}; + sycl::buffer buffer_c{vec_c, sycl::range(kVectSize)}; + + q.submit([&](sycl::handler &h) { + // use accessors to interact with buffers from device code + sycl::accessor accessor_a{buffer_a, h, sycl::read_only}; + sycl::accessor accessor_b{buffer_b, h, sycl::read_only}; + sycl::accessor accessor_c{buffer_c, h, sycl::read_write, sycl::no_init}; + + h.single_task([=]() { + VectorAdd(&accessor_a[0], &accessor_b[0], &accessor_c[0], kVectSize); + }); + }); + } + // result is copied back to host automatically when accessors go out of scope. + + // verify that VC is correct + for (int i = 0; i < kVectSize; i++) { + int expected = vec_a[i] + vec_b[i]; + if (vec_c[i] != expected) { + std::cout << "idx=" << i << ": result " << vec_c[i] << ", expected (" << expected << ") A=" << vec_a[i] << " + B=" << vec_b[i] << std::endl; + passed = false; + } + } + + std::cout << (passed ? "PASSED" : "FAILED") << std::endl; + + delete[] vec_a; + delete[] vec_b; + delete[] vec_c; + } catch (sycl::exception const &e) { + // Catches exceptions in the host code. + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.code().value() == CL_DEVICE_NOT_FOUND) { + std::cerr << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; + std::cerr << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt new file mode 100644 index 0000000000..d865aee631 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt @@ -0,0 +1,20 @@ +if(UNIX) + # Direct CMake to use dpcpp rather than the default C++ compiler/linker + set(CMAKE_CXX_COMPILER icpx) +else() # Windows + # Force CMake to use dpcpp rather than the default C++ compiler/linker + # (needed on Windows only) + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.7.2) + +project(FPGACompile CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt new file mode 100644 index 0000000000..e940daa564 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt @@ -0,0 +1,99 @@ +set(SOURCE_FILE vector_add.cpp) +set(TARGET_NAME vector_add) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_DEVICE) + set(FPGA_DEVICE "Agilex") + message(STATUS "FPGA_DEVICE was not specified.\ + \nConfiguring the design to the default FPGA family: ${FPGA_DEVICE}\ + \nPlease refer to the README for information on target selection.") +else() + message(STATUS "Configuring the design with the following target: ${FPGA_DEVICE}") +endif() + +# These are Windows-specific flags: +# 1. /EHsc This is a Windows-specific flag that enables exception handling in host code +# 2. /Qactypes Include ac_types headers and link against ac_types emulation libraries +if(WIN32) + set(WIN_FLAG "/EHsc") + set(AC_TYPES_FLAG "/Qactypes") +else() + set(AC_TYPES_FLAG "-qactypes") +endif() + +# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -Wall ${WIN_FLAG}") +set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}") +set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_SIMULATOR -Wall ${WIN_FLAG}") +set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(REPORT_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_REPORT") +set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_HARDWARE") +set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -o fpga_compile.cpp.o -c fpga_compile.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} fpga_compile.cpp.o -o fpga_compile.fpga_emu +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= -fsycl-link=early ac_fixed.cpp -o ac_fixed_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to SYCL. +add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) +target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${REPORT_COMPILE_FLAGS}") +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${REPORT_LINK_FLAGS} -fsycl-link=early") +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + +############################################################################### +### FPGA Simulator +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${SIMULATOR_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET}) +set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}") +set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${SIMULATOR_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile in a single command: +# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp new file mode 100644 index 0000000000..5d2c280f29 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp @@ -0,0 +1,116 @@ +#include + +// oneAPI headers +#include +#include + +using namespace sycl; + +// Forward declare the kernel name in the global scope. This is an FPGA best +// practice that reduces name mangling in the optimization reports. +class VectorAddID; + +template +struct VectorAdd { + public: + AccA vec_a_in; + AccB vec_b_in; + AccC vec_c_out; + int len; + + void operator()() const { + for (int idx = 0; idx < len; idx++) { + int a_val = vec_a_in[idx]; + int b_val = vec_b_in[idx]; + int sum = a_val + b_val; + vec_c_out[idx] = sum; + } + } +}; + +constexpr int kVectSize = 256; + +int main() { + bool passed = true; + try { + // Use compile-time macros to select either: + // - the FPGA emulator device (CPU emulation of the FPGA) + // - the FPGA device (a real FPGA) + // - the simulator device +#if FPGA_SIMULATOR + auto selector = sycl::ext::intel::fpga_simulator_selector_v; +#elif FPGA_HARDWARE + auto selector = sycl::ext::intel::fpga_selector_v; +#else // #if FPGA_EMULATOR + auto selector = sycl::ext::intel::fpga_emulator_selector_v; +#endif + + // create the device queue + sycl::queue q(selector); + + auto device = q.get_device(); + + std::cout << "Running on device: " + << device.get_info().c_str() + << std::endl; + + // declare arrays and fill them + int *vec_a = new int[kVectSize]; + int *vec_b = new int[kVectSize]; + int *vec_c = new int[kVectSize]; + for (int i = 0; i < kVectSize; i++) { + vec_a[i] = i; + vec_b[i] = (kVectSize - i); + } + + std::cout << "add two vectors of size " << kVectSize << std::endl; + { + // copy the input arrays to buffers to share with kernel + buffer buffer_a{vec_a, range(kVectSize)}; + buffer buffer_b{vec_b, range(kVectSize)}; + buffer buffer_c{vec_c, range(kVectSize)}; + + q.submit([&](handler &h) { + // use accessors to interact with buffers from device code + accessor accessor_a{buffer_a, h, read_only}; + accessor accessor_b{buffer_b, h, read_only}; + accessor accessor_c{buffer_c, h, read_write, no_init}; + + h.single_task(VectorAdd{ + accessor_a, accessor_b, accessor_c, kVectSize}); + }); + } + // result is copied back to host automatically when accessors go out of scope. + + // verify that vec_c is correct + for (int i = 0; i < kVectSize; i++) { + int expected = vec_a[i] + vec_b[i]; + if (vec_c[i] != expected) { + std::cout << "idx=" << i << ": result " << vec_c[i] << ", expected (" << expected << ") A=" << vec_a[i] << " + B=" << vec_b[i] << std::endl; + passed = false; + } + } + + std::cout << (passed ? "PASSED" : "FAILED") << std::endl; + + delete[] vec_a; + delete[] vec_b; + delete[] vec_c; + } catch (sycl::exception const &e) { + // Catches exceptions in the host code. + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.code().value() == CL_DEVICE_NOT_FOUND) { + std::cerr << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; + std::cerr << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file From 54f8c6fc718404e12fc5ec9d01082c7636393cf1 Mon Sep 17 00:00:00 2001 From: kevinUTAT Date: Mon, 13 Feb 2023 09:00:51 -0800 Subject: [PATCH 2/7] remove old files --- .../fpga_compile/CMakeLists.txt | 20 --- .../fpga_compile/src/CMakeLists.txt | 90 ------------ .../fpga_compile/src/fpga_compile.cpp | 129 ------------------ 3 files changed, 239 deletions(-) delete mode 100755 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt delete mode 100755 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt delete mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt deleted file mode 100755 index a64c26639a..0000000000 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -if(UNIX) - # Direct CMake to use icpx rather than the default C++ compiler/linker - set(CMAKE_CXX_COMPILER icpx) -else() # Windows - # Force CMake to use icx-cl rather than the default C++ compiler/linker - # (needed on Windows only) - include (CMakeForceCompiler) - CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) - include (Platform/Windows-Clang) -endif() - -cmake_minimum_required (VERSION 3.4) - -project(FPGACompile CXX) - -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) - -add_subdirectory (src) diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt deleted file mode 100755 index 373504a091..0000000000 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/CMakeLists.txt +++ /dev/null @@ -1,90 +0,0 @@ -set(SOURCE_FILE fpga_compile.cpp) -set(TARGET_NAME fpga_compile) -set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) -set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim) -set(FPGA_TARGET ${TARGET_NAME}.fpga) - -# FPGA board selection -if(NOT DEFINED FPGA_DEVICE) - set(FPGA_DEVICE "Agilex") - message(STATUS "FPGA_DEVICE was not specified.\ - \nConfiguring the design to the default FPGA family: ${FPGA_DEVICE}\ - \nPlease refer to the README for information on target selection.") -else() - message(STATUS "Configuring the design with the following target: ${FPGA_DEVICE}") -endif() - -# This is a Windows-specific flag that enables exception handling in host code -if(WIN32) - set(WIN_FLAG "/EHsc") -endif() - -# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. -# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). -# 2. The "link" stage invokes the compiler's FPGA backend before linking. -# For this reason, FPGA backend flags must be passed as link flags in CMake. -set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_EMULATOR") -set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga") -set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -Xssimulation -DFPGA_SIMULATOR") -set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") -set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga -Wall ${WIN_FLAG} -DFPGA_HARDWARE") -set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") -# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation - -############################################################################### -### FPGA Emulator -############################################################################### -# To compile in a single command: -# icpx -fsycl -fintelfpga -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu -# CMake executes: -# [compile] icpx -fsycl -fintelfpga -DFPGA_EMULATOR -o fpga_compile.cpp.o -c fpga_compile.cpp -# [link] icpx -fsycl -fintelfpga fpga_compile.cpp.o -o fpga_compile.fpga_emu -add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) -target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include) -set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") -set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") -add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) - -############################################################################### -### FPGA Simulator -############################################################################### -# To compile in a single command: -# icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= -DFPGA_SIMULATOR .cpp -o .fpga_sim -# CMake executes: -# [compile] icpx -fsycl -fintelfpga -Xssimulation -DFPGA_SIMULATOR -o .cpp.o -c .cpp -# [link] icpx -fsycl -fintelfpga -Xssimulation -Xsghdl -Xstarget= .cpp.o -o .fpga_sim -add_executable(${SIMULATOR_TARGET} ${SOURCE_FILE}) -target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include) -set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}") -set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS}") -add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET}) - -############################################################################### -### Generate Report -############################################################################### -# To compile manually: -# icpx -fsycl -fintelfpga -Xshardware -Xstarget= -fsycl-link=early fpga_compile.cpp -o fpga_compile_report.a -set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) -# The compile output is not an executable, but an intermediate compilation result unique to SYCL. -add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) -target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include) -add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) -set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") -set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early") -# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® - -############################################################################### -### FPGA Hardware -############################################################################### -# To compile in a single command: -# icpx -fsycl -fintelfpga -Xshardware -Xstarget= fpga_compile.cpp -o fpga_compile.fpga -# CMake executes: -# [compile] icpx -fsycl -fintelfpga -o fpga_compile.cpp.o -c fpga_compile.cpp -# [link] icpx -fsycl -fintelfpga -Xshardware -Xstarget= fpga_compile.cpp.o -o fpga_compile.fpga -add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) -target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include) -add_custom_target(fpga DEPENDS ${FPGA_TARGET}) -set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") -set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS}") - - diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp deleted file mode 100644 index ecabfca003..0000000000 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/src/fpga_compile.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//============================================================== -// Copyright Intel Corporation -// -// SPDX-License-Identifier: MIT -// ============================================================= -#include -#include -#include -#include - -#include "exception_handler.hpp" - -using namespace sycl; - -// Vector size for this example -constexpr size_t kSize = 1024; - -// Forward declare the kernel name in the global scope to reduce name mangling. -// This is an FPGA best practice that makes it easier to identify the kernel in -// the optimization reports. -class VectorAdd; - - -int main() { - - // Set up three vectors and fill two with random values. - std::vector vec_a(kSize), vec_b(kSize), vec_r(kSize); - for (int i = 0; i < kSize; i++) { - vec_a[i] = rand(); - vec_b[i] = rand(); - } - - // Select either: - // - the FPGA emulator device (CPU emulation of the FPGA) - // - the FPGA simulator - // - the FPGA device (a real FPGA) -#if FPGA_SIMULATOR - auto selector = sycl::ext::intel::fpga_simulator_selector_v; -#elif FPGA_HARDWARE - auto selector = sycl::ext::intel::fpga_selector_v; -#else // #if FPGA_EMULATOR - auto selector = sycl::ext::intel::fpga_emulator_selector_v; -#endif - - try { - - // Create a queue bound to the chosen device. - // If the device is unavailable, a SYCL runtime exception is thrown. - queue q(selector, fpga_tools::exception_handler); - - // Print out the device information. - auto device = q.get_device(); - - std::cout << "Running on device: " - << device.get_info().c_str() - << std::endl; - - { - // Create buffers to share data between host and device. - // The runtime will copy the necessary data to the FPGA device memory - // when the kernel is launched. - buffer buf_a(vec_a); - buffer buf_b(vec_b); - buffer buf_r(vec_r); - - - // Submit a command group to the device queue. - q.submit([&](handler& h) { - - // The SYCL runtime uses the accessors to infer data dependencies. - // A "read" accessor must wait for data to be copied to the device - // before the kernel can start. A "write no_init" accessor does not. - accessor a(buf_a, h, read_only); - accessor b(buf_b, h, read_only); - accessor r(buf_r, h, write_only, no_init); - - // The kernel uses single_task rather than parallel_for. - // The task's for loop is executed in pipeline parallel on the FPGA, - // exploiting the same parallelism as an equivalent parallel_for. - // - // The "kernel_args_restrict" tells the compiler that a, b, and r - // do not alias. For a full explanation, see: - // C++SYCL_FPGA/Tutorials/Features/kernel_args_restrict - h.single_task([=]() [[intel::kernel_args_restrict]] { - for (int i = 0; i < kSize; ++i) { - r[i] = a[i] + b[i]; - } - }); - }); - - // The buffer destructor is invoked when the buffers pass out of scope. - // buf_r's destructor updates the content of vec_r on the host. - } - - // The queue destructor is invoked when q passes out of scope. - // q's destructor invokes q's exception handler on any device exceptions. - } - catch (sycl::exception const& e) { - // Catches exceptions in the host code - std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; - - // Most likely the runtime couldn't find FPGA hardware! - if (e.code().value() == CL_DEVICE_NOT_FOUND) { - std::cerr << "If you are targeting an FPGA, please ensure that your " - "system has a correctly configured FPGA board.\n"; - std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; - std::cerr << "If you are targeting the FPGA emulator, compile with " - "-DFPGA_EMULATOR.\n"; - } - std::terminate(); - } - - // Check the results. - int correct = 0; - for (int i = 0; i < kSize; i++) { - if ( vec_r[i] == vec_a[i] + vec_b[i] ) { - correct++; - } - } - - // Summarize and return. - if (correct == kSize) { - std::cout << "PASSED: results are correct\n"; - } else { - std::cout << "FAILED: results are incorrect\n"; - } - - return !(correct == kSize); -} From 032db08eef7fc2e82d4d6efdbcc4e74eb32b9451 Mon Sep 17 00:00:00 2001 From: kevinUTAT Date: Tue, 14 Feb 2023 08:29:12 -0800 Subject: [PATCH 3/7] change link style to inline --- .../Tutorials/GettingStarted/fpga_compile/README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md index 1e3db77510..8a363fc1df 100755 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md @@ -101,7 +101,7 @@ For this compilation type, your SYCL device code is optimized and converted into The optimization report contains significant information about how the compiler has transformed your device code into an FPGA design. The report includes visualizations of structures generated on the FPGA, performance and expected performance bottleneck information, and estimated resource utilization. Optimization reports are generated for the "optimization report", "simulator" and "hardware" compilation types. -The [FPGA Optimization Guide for Intel® oneAPI Toolkits Developer Guide][1] contains a chapter about how to analyze the reports generated after the FPGA early image and FPGA image. +The [FPGA Optimization Guide for Intel® oneAPI Toolkits Developer Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide/top/analyze-your-design.html) contains a chapter about how to analyze the reports generated after the FPGA early image and FPGA image. #### FPGA Simulator The FPGA simulator allows you to simulate the exact behavior of the synthesized kernel. Like emulation, you can run simulation on a system that does not have a target FPGA board installed. The simulator models a kernel much more accurately than the emulator, but it is much slower than the emulator. @@ -180,7 +180,7 @@ Through `-Xstarget`, you can target an explicit board, a device family or a FPGA `-DFPGA_EMULATOR`, `-DFPGA_SIMULATOR`, `-DFPGA_HARDWARE` are options that adds a preprocessor define that invokes the emulator/simulator/FPGA device selector in this sample (see code snippet above). -The [Intel® oneAPI Programming Guide][2] contains a chapter explains the compiler options used here. +The [Intel® oneAPI Programming Guide](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/programming-interface/fpga-flow/fpga-compilation-flags.html) contains a chapter explains the compiler options used here. ### Source Code @@ -219,7 +219,7 @@ Part 5 demonstrates the vector addition in SYCL* C++ with a 'functor' coding st > - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat` > - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` > -> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*][3] or [Use the setvars Script with Windows*][4]. +> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html). ### Run CMake to generate the Makefiles @@ -346,8 +346,3 @@ Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt). - -[1]: -[2]: -[3]: -[4]: From 3335e4e3006b4b5a5fa64ef114918633a6ae8e85 Mon Sep 17 00:00:00 2001 From: kevinUTAT Date: Thu, 23 Feb 2023 12:07:48 -0800 Subject: [PATCH 4/7] remove part5 --- .../GettingStarted/fpga_compile/README.md | 11 +- .../CMakeLists.txt | 20 --- .../src/CMakeLists.txt | 99 --------------- .../src/vector_add.cpp | 116 ------------------ 4 files changed, 3 insertions(+), 243 deletions(-) delete mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt delete mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt delete mode 100644 DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md index 8a363fc1df..bbdd603965 100755 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/README.md @@ -22,7 +22,7 @@ This FPGA tutorial introduces how to compile SYCL*-compliant code for FPGAs thro > > :warning: Make sure you add the device files associated with the FPGA that you are targeting to your Intel® Quartus® Prime installation. -> **Note**: SYCL USM allocations, used in `part2` and `part5` of this tutorial, are only supported on FPGA boards that have a USM capable BSP (e.g. the Intel® FPGA PAC D5005 with Intel Stratix® 10 SX with USM support: intel_s10sx_pac:pac_s10_usm) or when targeting an FPGA family/part number. +> **Note**: SYCL USM allocations, used in `part2` and `part3` of this tutorial, are only supported on FPGA boards that have a USM capable BSP (e.g. the Intel® FPGA PAC D5005 with Intel Stratix® 10 SX with USM support: intel_s10sx_pac:pac_s10_usm) or when targeting an FPGA family/part number. ## Prerequisites This sample is part of the FPGA code samples. @@ -184,7 +184,7 @@ The [Intel® oneAPI Programming Guide](https://www.intel.com/content/www/us/en/d ### Source Code -There are 5 parts to this tutorial located in the 3 sub-folders. Together, they demonstrate how you can migrate an algorithm from vanilla C++ code to SYCL for FPGA. Note that you may also choose to use a functor with buffers, or a function with USM. +There are 4 parts to this tutorial located in the 3 sub-folders. Together, they demonstrate how you can migrate an algorithm from vanilla C++ code to SYCL for FPGA. Note that you may also choose to use a functor with buffers, or a function with USM. #### Part 1 C++ Part 1 demonstrates a vector addition program in vanilla C++. Observe how the `VectorAdd` function is separated from the `main()` function, and the `vec_a`, `vec_b`, and `vec_c` vectors are allocated onto the heap. @@ -202,9 +202,6 @@ h.single_task<...>([=]() { #### Part 4 SYCL* (lambda function and buffer) Part 4 shows the vector addition in SYCL* C++ with a 'function' coding style and buffer & accessor interface. This code style will be familiar to users who are already experienced with SYCL*. Observe how `vec_a`, `vec_b`, and `vec_c` are copied into buffers before the `VectorAdd` function is called. -#### Part 5 SYCL* (functor and buffer) -Part 5 demonstrates the vector addition in SYCL* C++ with a 'functor' coding style using buffer & accessor interface. Observe how template parameters are used in the functor to pass in the accessors. - ## Building the `fpga_compile` Tutorial > **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. > Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window. @@ -233,7 +230,6 @@ cd - `part2-dpcpp_functor_usm` - `part3-dpcpp_lambda_usm` - `part4-dpcpp_lambda_buffers` -- `part5-dpcpp_functor_buffers` Generate the `Makefile` by running `cmake`. ``` @@ -266,7 +262,6 @@ cd - `part2-dpcpp_functor_usm` - `part3-dpcpp_lambda_usm` - `part4-dpcpp_lambda_buffers` -- `part5-dpcpp_functor_buffers` Generate the `Makefile` by running `cmake`. ``` @@ -304,7 +299,7 @@ The `fpga_emu`, `fpga_sim` and `fpga` targets produce binaries that you can run. For part 1 of this tutorial, only the `fpga_emu` target is available as this regular C++ code only target a CPU. ## Examining the Reports -In *part2*, *part3*, *part4* and *part5*, after running the `report` target, the optimization report can be viewed using the `fpga_report` application: +In *part2*, *part3* and *part4*, after running the `report` target, the optimization report can be viewed using the `fpga_report` application: ``` fpga_report vector_add.report.prj/reports/vector_add_report.zip ``` diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt deleted file mode 100644 index d865aee631..0000000000 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -if(UNIX) - # Direct CMake to use dpcpp rather than the default C++ compiler/linker - set(CMAKE_CXX_COMPILER icpx) -else() # Windows - # Force CMake to use dpcpp rather than the default C++ compiler/linker - # (needed on Windows only) - include (CMakeForceCompiler) - CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) - include (Platform/Windows-Clang) -endif() - -cmake_minimum_required (VERSION 3.7.2) - -project(FPGACompile CXX) - -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) - -add_subdirectory (src) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt deleted file mode 100644 index e940daa564..0000000000 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/CMakeLists.txt +++ /dev/null @@ -1,99 +0,0 @@ -set(SOURCE_FILE vector_add.cpp) -set(TARGET_NAME vector_add) -set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) -set(SIMULATOR_TARGET ${TARGET_NAME}.fpga_sim) -set(FPGA_TARGET ${TARGET_NAME}.fpga) - -# FPGA board selection -if(NOT DEFINED FPGA_DEVICE) - set(FPGA_DEVICE "Agilex") - message(STATUS "FPGA_DEVICE was not specified.\ - \nConfiguring the design to the default FPGA family: ${FPGA_DEVICE}\ - \nPlease refer to the README for information on target selection.") -else() - message(STATUS "Configuring the design with the following target: ${FPGA_DEVICE}") -endif() - -# These are Windows-specific flags: -# 1. /EHsc This is a Windows-specific flag that enables exception handling in host code -# 2. /Qactypes Include ac_types headers and link against ac_types emulation libraries -if(WIN32) - set(WIN_FLAG "/EHsc") - set(AC_TYPES_FLAG "/Qactypes") -else() - set(AC_TYPES_FLAG "-qactypes") -endif() - -# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. -# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). -# 2. The "link" stage invokes the compiler's FPGA backend before linking. -# For this reason, FPGA backend flags must be passed as link flags in CMake. -set(EMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -Wall ${WIN_FLAG}") -set(EMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG}") -set(SIMULATOR_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_SIMULATOR -Wall ${WIN_FLAG}") -set(SIMULATOR_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") -set(REPORT_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_REPORT") -set(REPORT_LINK_FLAGS "-fsycl -fintelfpga -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") -set(HARDWARE_COMPILE_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Wall ${WIN_FLAG} -DFPGA_HARDWARE") -set(HARDWARE_LINK_FLAGS "-fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_HARDWARE_FLAGS}") -# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation - -############################################################################### -### FPGA Emulator -############################################################################### -# To compile in a single command: -# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu -# CMake executes: -# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -o fpga_compile.cpp.o -c fpga_compile.cpp -# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} fpga_compile.cpp.o -o fpga_compile.fpga_emu -add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) -target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include) -set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") -set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") -add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) - -############################################################################### -### Generate Report -############################################################################### -# To compile manually: -# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= -fsycl-link=early ac_fixed.cpp -o ac_fixed_report.a -set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) -# The compile output is not an executable, but an intermediate compilation result unique to SYCL. -add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) -target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include) -add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) -set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${REPORT_COMPILE_FLAGS}") -set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${REPORT_LINK_FLAGS} -fsycl-link=early") -# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® - -############################################################################### -### FPGA Simulator -############################################################################### -# To compile in a single command: -# icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp -o ac_fixed.fpga -# CMake executes: -# [compile] icpx -fsycl -fintelfpga -DFPGA_SIMULATOR ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp -# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xssimulation -Xsghdl -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga -add_executable(${SIMULATOR_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) -target_include_directories(${SIMULATOR_TARGET} PRIVATE ../../../../include) -add_custom_target(fpga_sim DEPENDS ${SIMULATOR_TARGET}) -set_target_properties(${SIMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${SIMULATOR_COMPILE_FLAGS}") -set_target_properties(${SIMULATOR_TARGET} PROPERTIES LINK_FLAGS "${SIMULATOR_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${SIMULATOR_TARGET}") -# The -reuse-exe flag enables rapid recompilation of host-only code changes. -# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. - -############################################################################### -### FPGA Hardware -############################################################################### -# To compile in a single command: -# icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp -o ac_fixed.fpga -# CMake executes: -# [compile] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp -# [link] icpx -fsycl -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xstarget= ac_fixed.cpp.o -o ac_fixed.fpga -add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) -target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include) -add_custom_target(fpga DEPENDS ${FPGA_TARGET}) -set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") -set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") -# The -reuse-exe flag enables rapid recompilation of host-only code changes. -# See C++SYCL_FPGA/GettingStarted/fast_recompile for details. \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp deleted file mode 100644 index 5d2c280f29..0000000000 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part5-dpcpp_functor_buffers/src/vector_add.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include - -// oneAPI headers -#include -#include - -using namespace sycl; - -// Forward declare the kernel name in the global scope. This is an FPGA best -// practice that reduces name mangling in the optimization reports. -class VectorAddID; - -template -struct VectorAdd { - public: - AccA vec_a_in; - AccB vec_b_in; - AccC vec_c_out; - int len; - - void operator()() const { - for (int idx = 0; idx < len; idx++) { - int a_val = vec_a_in[idx]; - int b_val = vec_b_in[idx]; - int sum = a_val + b_val; - vec_c_out[idx] = sum; - } - } -}; - -constexpr int kVectSize = 256; - -int main() { - bool passed = true; - try { - // Use compile-time macros to select either: - // - the FPGA emulator device (CPU emulation of the FPGA) - // - the FPGA device (a real FPGA) - // - the simulator device -#if FPGA_SIMULATOR - auto selector = sycl::ext::intel::fpga_simulator_selector_v; -#elif FPGA_HARDWARE - auto selector = sycl::ext::intel::fpga_selector_v; -#else // #if FPGA_EMULATOR - auto selector = sycl::ext::intel::fpga_emulator_selector_v; -#endif - - // create the device queue - sycl::queue q(selector); - - auto device = q.get_device(); - - std::cout << "Running on device: " - << device.get_info().c_str() - << std::endl; - - // declare arrays and fill them - int *vec_a = new int[kVectSize]; - int *vec_b = new int[kVectSize]; - int *vec_c = new int[kVectSize]; - for (int i = 0; i < kVectSize; i++) { - vec_a[i] = i; - vec_b[i] = (kVectSize - i); - } - - std::cout << "add two vectors of size " << kVectSize << std::endl; - { - // copy the input arrays to buffers to share with kernel - buffer buffer_a{vec_a, range(kVectSize)}; - buffer buffer_b{vec_b, range(kVectSize)}; - buffer buffer_c{vec_c, range(kVectSize)}; - - q.submit([&](handler &h) { - // use accessors to interact with buffers from device code - accessor accessor_a{buffer_a, h, read_only}; - accessor accessor_b{buffer_b, h, read_only}; - accessor accessor_c{buffer_c, h, read_write, no_init}; - - h.single_task(VectorAdd{ - accessor_a, accessor_b, accessor_c, kVectSize}); - }); - } - // result is copied back to host automatically when accessors go out of scope. - - // verify that vec_c is correct - for (int i = 0; i < kVectSize; i++) { - int expected = vec_a[i] + vec_b[i]; - if (vec_c[i] != expected) { - std::cout << "idx=" << i << ": result " << vec_c[i] << ", expected (" << expected << ") A=" << vec_a[i] << " + B=" << vec_b[i] << std::endl; - passed = false; - } - } - - std::cout << (passed ? "PASSED" : "FAILED") << std::endl; - - delete[] vec_a; - delete[] vec_b; - delete[] vec_c; - } catch (sycl::exception const &e) { - // Catches exceptions in the host code. - std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; - - // Most likely the runtime couldn't find FPGA hardware! - if (e.code().value() == CL_DEVICE_NOT_FOUND) { - std::cerr << "If you are targeting an FPGA, please ensure that your " - "system has a correctly configured FPGA board.\n"; - std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; - std::cerr << "If you are targeting the FPGA emulator, compile with " - "-DFPGA_EMULATOR.\n"; - } - std::terminate(); - } - return passed ? EXIT_SUCCESS : EXIT_FAILURE; -} \ No newline at end of file From b6cea2f29f98589e0228331b75c81365994b9d52 Mon Sep 17 00:00:00 2001 From: Kevin Xu Date: Fri, 24 Feb 2023 09:08:49 -0500 Subject: [PATCH 5/7] Update DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp remove public in struct Co-authored-by: Paul White --- .../fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp index e79e112528..2366fff581 100644 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp @@ -9,7 +9,6 @@ class VectorAddID; struct VectorAdd { - public: int *const vec_a_in; int *const vec_b_in; int *const vec_c_out; From 1836ae7fe981bcb6b9ba629d7400ab147883656b Mon Sep 17 00:00:00 2001 From: Kevin Xu Date: Mon, 27 Feb 2023 09:04:41 -0500 Subject: [PATCH 6/7] Update DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp add checking for usm support Co-authored-by: yuguen-intel --- .../fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp index 2366fff581..e39ceb2a31 100644 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp @@ -50,6 +50,11 @@ int main() { << device.get_info().c_str() << std::endl; +if (!device.has(aspect::usm_host_allocations)) { + std::terminate(); +} + + // declare arrays and fill them // allocate in shared memory so the kernel can see them int *vec_a = sycl::malloc_shared(kVectSize, q); From 57279c57b6a65ffd30da3fe6560f7865bca30b36 Mon Sep 17 00:00:00 2001 From: kevinUTAT Date: Fri, 3 Mar 2023 06:22:18 -0800 Subject: [PATCH 7/7] fix USM check --- .../fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp index e39ceb2a31..8c024f1209 100644 --- a/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp +++ b/DirectProgramming/C++SYCL_FPGA/Tutorials/GettingStarted/fpga_compile/part2-dpcpp_functor_usm/src/vector_add.cpp @@ -50,7 +50,7 @@ int main() { << device.get_info().c_str() << std::endl; -if (!device.has(aspect::usm_host_allocations)) { +if (!device.has(sycl::aspect::usm_host_allocations)) { std::terminate(); }